Merge pull request #80 from shadps4-emu/linux

common: Rework timekeeping with native RDTSC and port to linux
This commit is contained in:
georgemoralis 2024-02-23 14:45:05 +02:00 committed by GitHub
commit 32a5ff15bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 819 additions and 280 deletions

7
.gitmodules vendored
View File

@ -33,11 +33,10 @@
path = third-party/toml11 path = third-party/toml11
url = https://github.com/ToruNiina/toml11 url = https://github.com/ToruNiina/toml11
branch = master branch = master
[submodule "third-party/vulkan"]
path = third-party/vulkan
url = https://github.com/shadps4/vulkan.git
branch = main
[submodule "third-party/xxHash"] [submodule "third-party/xxHash"]
path = third-party/xxHash path = third-party/xxHash
url = https://github.com/Cyan4973/xxHash.git url = https://github.com/Cyan4973/xxHash.git
branch = dev branch = dev
[submodule "third-party/vulkan"]
path = third-party/vulkan
url = https://github.com/GPUCode/vulkan

View File

@ -30,8 +30,8 @@ endfunction()
add_subdirectory(third-party) add_subdirectory(third-party)
include_directories(src) include_directories(src)
set(LIBC_SOURCES src/core/hle/libraries/libc/Libc.cpp set(LIBC_SOURCES src/core/hle/libraries/libc/libc.cpp
src/core/hle/libraries/libc/Libc.h src/core/hle/libraries/libc/libc.h
src/core/hle/libraries/libc/printf.h src/core/hle/libraries/libc/printf.h
src/core/hle/libraries/libc/va_ctx.h src/core/hle/libraries/libc/va_ctx.h
src/core/hle/libraries/libc/libc_cxa.cpp src/core/hle/libraries/libc/libc_cxa.cpp
@ -77,12 +77,15 @@ add_executable(shadps4
src/common/fs_file.h src/common/fs_file.h
src/common/log.cpp src/common/log.cpp
src/common/log.h src/common/log.h
src/common/native_clock.cpp
src/common/native_clock.h
src/common/rdtsc.cpp
src/common/rdtsc.h
src/common/singleton.h src/common/singleton.h
src/common/string_util.cpp src/common/string_util.cpp
src/common/string_util.h src/common/string_util.h
src/common/timer.cpp
src/common/timer.h
src/common/types.h src/common/types.h
src/common/uint128.h
src/common/version.h src/common/version.h
${LIBC_SOURCES} ${LIBC_SOURCES}
${USERSERVICE_SOURCES} ${USERSERVICE_SOURCES}
@ -143,8 +146,6 @@ add_executable(shadps4
src/core/PS4/HLE/Graphics/graphics_render.h src/core/PS4/HLE/Graphics/graphics_render.h
src/core/PS4/GPU/tile_manager.cpp src/core/PS4/GPU/tile_manager.cpp
src/core/PS4/GPU/tile_manager.h src/core/PS4/GPU/tile_manager.h
src/emuTimer.cpp
src/emuTimer.h
src/core/hle/libraries/libkernel/time_management.cpp src/core/hle/libraries/libkernel/time_management.cpp
src/core/hle/libraries/libkernel/time_management.h src/core/hle/libraries/libkernel/time_management.h
"src/common/io_file.cpp" "src/common/io_file.h") "src/common/io_file.cpp" "src/common/io_file.h")
@ -154,13 +155,15 @@ create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt spdlog::spdlog toml11::toml11) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt spdlog::spdlog toml11::toml11)
target_link_libraries(shadps4 PRIVATE discord-rpc imgui SDL3-shared vulkan-1 xxhash Zydis) target_link_libraries(shadps4 PRIVATE discord-rpc imgui SDL3-shared vulkan-1 xxhash Zydis)
if (WIN32) if (WIN32)
target_link_libraries(shadps4 PRIVATE mincore winpthread) target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib)
endif() endif()
add_custom_command(TARGET shadps4 POST_BUILD add_custom_command(TARGET shadps4 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different COMMAND ${CMAKE_COMMAND} -E copy_if_different
$<TARGET_FILE:SDL3-shared> $<TARGET_FILE:SDL3-shared>
$<TARGET_FILE_DIR:shadps4>) $<TARGET_FILE_DIR:shadps4>)
add_custom_command(TARGET shadps4 POST_BUILD if (WIN32)
add_custom_command(TARGET shadps4 POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $<TARGET_FILE_DIR:shadps4>) "${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $<TARGET_FILE_DIR:shadps4>)
endif()

View File

@ -109,7 +109,11 @@ int Init(bool use_stdout) {
if (use_stdout) { if (use_stdout) {
sinks.push_back(std::make_shared<spdlog::sinks::stdout_color_sink_mt>()); sinks.push_back(std::make_shared<spdlog::sinks::stdout_color_sink_mt>());
} }
#ifdef _WIN64
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>(L"shadps4.txt", true)); sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>(L"shadps4.txt", true));
#else
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>("shadps4.txt", true));
#endif
spdlog::set_default_logger(std::make_shared<spdlog::logger>("shadps4 logger", begin(sinks), end(sinks))); spdlog::set_default_logger(std::make_shared<spdlog::logger>("shadps4 logger", begin(sinks), end(sinks)));
auto f = std::make_unique<spdlog::pattern_formatter>("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol auto f = std::make_unique<spdlog::pattern_formatter>("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol
spdlog::set_formatter(std::move(f)); spdlog::set_formatter(std::move(f));

View File

@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/uint128.h"
#include "common/native_clock.h"
#include "common/rdtsc.h"
#ifdef _WIN64
#include <pthread_time.h>
#else
#include <time.h>
#endif
namespace Common {
NativeClock::NativeClock()
: rdtsc_frequency{EstimateRDTSCFrequency()}, ns_rdtsc_factor{GetFixedPoint64Factor(std::nano::den,
rdtsc_frequency)},
us_rdtsc_factor{GetFixedPoint64Factor(std::micro::den, rdtsc_frequency)},
ms_rdtsc_factor{GetFixedPoint64Factor(std::milli::den, rdtsc_frequency)} {}
u64 NativeClock::GetTimeNS() const {
return MultiplyHigh(GetUptime(), ns_rdtsc_factor);
}
u64 NativeClock::GetTimeUS() const {
return MultiplyHigh(GetUptime(), us_rdtsc_factor);
}
u64 NativeClock::GetTimeMS() const {
return MultiplyHigh(GetUptime(), ms_rdtsc_factor);
}
u64 NativeClock::GetUptime() const {
return FencedRDTSC();
}
u64 NativeClock::GetProcessTimeUS() const {
timespec ret;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret);
return ret.tv_nsec / 1000 + ret.tv_sec * 1000000;
}
} // namespace Common::X64

32
src/common/native_clock.h Normal file
View File

@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include "common/types.h"
namespace Common {
class NativeClock final {
public:
explicit NativeClock();
u64 GetTscFrequency() const {
return rdtsc_frequency;
}
u64 GetTimeNS() const;
u64 GetTimeUS() const;
u64 GetTimeMS() const;
u64 GetUptime() const;
u64 GetProcessTimeUS() const;
private:
u64 rdtsc_frequency;
u64 ns_rdtsc_factor;
u64 us_rdtsc_factor;
u64 ms_rdtsc_factor;
};
} // namespace Common

60
src/common/rdtsc.cpp Normal file
View File

@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include "common/rdtsc.h"
#include "common/uint128.h"
#ifdef _WIN64
#include <windows.h>
#endif
namespace Common {
static constexpr size_t SecondToNanoseconds = 1000000000ULL;
template <u64 Nearest>
static u64 RoundToNearest(u64 value) {
const auto mod = value % Nearest;
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
}
static u64 GetTimeNs() {
#ifdef _WIN64
// GetSystemTimePreciseAsFileTime returns the file time in 100ns units.
static constexpr u64 Multiplier = 100;
// Convert Windows epoch to Unix epoch.
static constexpr u64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL;
FILETIME filetime;
GetSystemTimePreciseAsFileTime(&filetime);
return Multiplier * ((static_cast<u64>(filetime.dwHighDateTime) << 32) +
static_cast<u64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch);
#elif defined(__APPLE__)
return clock_gettime_nsec_np(CLOCK_REALTIME);
#else
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * SecondToNanoseconds + ts.tv_nsec;
#endif
}
u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc.
FencedRDTSC();
std::this_thread::sleep_for(std::chrono::milliseconds{1});
FencedRDTSC();
// Get the current time.
const auto start_time = GetTimeNs();
const u64 tsc_start = FencedRDTSC();
// Wait for 100 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{100});
const auto end_time = GetTimeNs();
const u64 tsc_end = FencedRDTSC();
// Calculate differences.
const u64 tsc_diff = tsc_end - tsc_start;
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, end_time - start_time);
return RoundToNearest<100'000>(tsc_freq);
}
} // namespace Common

37
src/common/rdtsc.h Normal file
View File

@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include "common/types.h"
namespace Common {
#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
}
#else
static inline u64 FencedRDTSC() {
u64 eax;
u64 edx;
asm volatile("lfence\n\t"
"rdtsc\n\t"
"lfence\n\t"
: "=a"(eax), "=d"(edx));
return (edx << 32) | eax;
}
#endif
u64 EstimateRDTSCFrequency();
} // namespace Common

View File

@ -1,108 +0,0 @@
#include "common/timer.h"
#ifdef _WIN64
#include <windows.h>
#endif
namespace Common {
Timer::Timer() {
#ifdef _WIN64
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
m_Frequency = f.QuadPart;
#else
#error Unimplemented Timer constructor
#endif
}
void Timer::Start() {
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
m_StartTime = c.QuadPart;
#else
#error Unimplemented Timer::Start()
#endif
m_is_timer_paused = false;
}
void Timer::Pause() {
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
m_PauseTime = c.QuadPart;
#else
#error Unimplemented Timer::Pause()
#endif
m_is_timer_paused = true;
}
void Timer::Resume() {
u64 current_time = 0;
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
current_time = c.QuadPart;
#else
#error Unimplemented Timer::Resume()
#endif
m_StartTime += current_time - m_PauseTime;
m_is_timer_paused = false;
}
double Timer::GetTimeMsec() const {
if (m_is_timer_paused) {
return 1000.0 * (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
}
u64 current_time = 0;
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
current_time = c.QuadPart;
#else
#error Unimplemented Timer::GetTimeMsec()
#endif
return 1000.0 * (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
}
double Timer::GetTimeSec() const {
if (m_is_timer_paused) {
return (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
}
u64 current_time = 0;
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
current_time = c.QuadPart;
#else
#error Unimplemented Timer::GetTimeSec()
#endif
return (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
}
u64 Timer::GetTicks() const {
if (m_is_timer_paused) {
return (m_PauseTime - m_StartTime);
}
u64 current_time = 0;
#ifdef _WIN64
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
current_time = c.QuadPart;
#else
#error Unimplemented Timer::GetTicks()
#endif
return (current_time - m_StartTime);
}
u64 Timer::getQueryPerformanceCounter() {
LARGE_INTEGER c;
QueryPerformanceCounter(&c);
return c.QuadPart;
}
} // namespace Common

View File

@ -1,43 +0,0 @@
#pragma once
#include "common/types.h"
namespace Common {
class Timer final {
public:
Timer();
~Timer() = default;
void Start();
void Pause();
void Resume();
bool IsPaused() const {
return m_is_timer_paused;
}
u64 GetFrequency() const {
return m_Frequency;
}
double GetTimeMsec() const;
double GetTimeSec() const;
u64 GetTicks() const;
[[nodiscard]] static u64 getQueryPerformanceCounter();
public:
Timer(const Timer&) = delete;
Timer& operator=(const Timer&) = delete;
Timer(Timer&&) = delete;
Timer& operator=(Timer&&) = delete;
private:
bool m_is_timer_paused = true;
u64 m_Frequency{};
u64 m_StartTime{};
u64 m_PauseTime{};
};
} // namespace Common

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <array>
#include <cstdint> #include <cstdint>
using s08 = std::int8_t; using s08 = std::int8_t;
@ -15,9 +16,12 @@ using u64 = std::uint64_t;
using f32 = float; using f32 = float;
using f64 = double; using f64 = double;
using u128 = std::array<std::uint64_t, 2>;
static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
#define PS4_SYSV_ABI __attribute__((sysv_abi)) #define PS4_SYSV_ABI __attribute__((sysv_abi))
// UDLs for memory size values // UDLs for memory size values
constexpr u64 operator""_KB(u64 x) { return 1024ULL * x; } constexpr unsigned long long operator""_KB(unsigned long long x) { return 1024ULL * x; }
constexpr u64 operator""_MB(u64 x) { return 1024_KB * x; } constexpr unsigned long long operator""_MB(unsigned long long x) { return 1024_KB * x; }
constexpr u64 operator""_GB(u64 x) { return 1024_MB * x; } constexpr unsigned long long operator""_GB(unsigned long long x) { return 1024_MB * x; }

115
src/common/uint128.h Normal file
View File

@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <utility>
#ifdef _MSC_VER
#ifndef __clang__
#define HAS_INTRINSICS
#include <intrin.h>
#pragma intrinsic(__umulh)
#pragma intrinsic(_umul128)
#pragma intrinsic(_udiv128)
#else
#endif
#else
#include <cstring>
#endif
#include "common/types.h"
namespace Common {
// This function multiplies 2 u64 values and divides it by a u64 value.
[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
#ifdef HAS_INTRINSICS
u128 r{};
r[0] = _umul128(a, b, &r[1]);
u64 remainder;
return _udiv128(r[1], r[0], d, &remainder);
#else
const u64 diva = a / d;
const u64 moda = a % d;
const u64 divb = b / d;
const u64 modb = b % d;
return diva * b + moda * divb + moda * modb / d;
#endif
}
// This function multiplies 2 u64 values and produces a u128 value;
[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
u128 result;
#ifdef HAS_INTRINSICS
result[0] = _umul128(a, b, &result[1]);
#else
unsigned __int128 tmp = a;
tmp *= b;
std::memcpy(&result, &tmp, sizeof(u128));
#endif
return result;
}
[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
#ifdef __SIZEOF_INT128__
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
return static_cast<u64>(base / divisor);
#elif defined(_M_X64) || defined(_M_ARM64)
std::array<u64, 2> r = {0, numerator};
u64 remainder;
return _udiv128(r[1], r[0], divisor, &remainder);
#else
// This one is bit more inaccurate.
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
#endif
}
[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
#ifdef __SIZEOF_INT128__
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
#elif defined(_M_X64) || defined(_M_ARM64)
return __umulh(a, b); // MSVC
#else
// Generic fallback
const u64 a_lo = u32(a);
const u64 a_hi = a >> 32;
const u64 b_lo = u32(b);
const u64 b_hi = b >> 32;
const u64 a_x_b_hi = a_hi * b_hi;
const u64 a_x_b_mid = a_hi * b_lo;
const u64 b_x_a_mid = b_hi * a_lo;
const u64 a_x_b_lo = a_lo * b_lo;
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
32;
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
return multhi;
#endif
}
// This function divides a u128 by a u32 value and produces two u64 values:
// the result of division and the remainder
[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
u64 remainder = dividend[0] % divisor;
u64 accum = dividend[0] / divisor;
if (dividend[1] == 0)
return {accum, remainder};
// We ignore dividend[1] / divisor as that overflows
const u64 first_segment = (dividend[1] % divisor) << 32;
accum += (first_segment / divisor) << 32;
const u64 second_segment = (first_segment % divisor) << 32;
accum += (second_segment / divisor);
remainder += second_segment % divisor;
if (remainder >= divisor) {
accum++;
remainder -= divisor;
}
return {accum, remainder};
}
} // namespace Common

View File

@ -1,5 +1,5 @@
#include "gpu_memory.h" #include "gpu_memory.h"
#include <atomic>
#include <xxh3.h> #include <xxh3.h>
#include "common/singleton.h" #include "common/singleton.h"

View File

@ -2,8 +2,9 @@
#include "common/types.h" #include "common/types.h"
#include <vector> #include <vector>
#include <vulkan/vulkan_core.h> #include <vulkan/vulkan.h>
#include <mutex> #include <mutex>
#include <memory>
namespace HLE::Libs::Graphics { namespace HLE::Libs::Graphics {

View File

@ -1,4 +1,6 @@
#pragma once #pragma once
#include <atomic>
#include <mutex> #include <mutex>
#include <string> #include <string>
#include <vector> #include <vector>
@ -52,4 +54,4 @@ class HandleTable {
std::mutex m_mutex; std::mutex m_mutex;
}; };
} // namespace Core::FileSys } // namespace Core::FileSys

View File

@ -1,6 +1,5 @@
#include "common/debug.h" #include "common/debug.h"
#include "common/timer.h" #include "core/hle/kernel/Objects/event_queue.h"
#include "core/hle/kernel/objects/event_queue.h"
namespace Core::Kernel { namespace Core::Kernel {
@ -24,28 +23,19 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
std::unique_lock lock{m_mutex}; std::unique_lock lock{m_mutex};
int ret = 0;
u32 timeElapsed = 0; const auto predicate = [&] {
Common::Timer t; ret = getTriggeredEvents(ev, num);
t.Start(); return ret > 0;
};
for (;;) { if (micros == 0) {
int ret = getTriggeredEvents(ev, num); m_cond.wait(lock, predicate);
} else {
if (ret > 0 || (timeElapsed >= micros && micros != 0)) { m_cond.wait_for(lock, std::chrono::microseconds(micros), predicate);
return ret;
}
if (micros == 0) {
m_cond.wait(lock);
} else {
m_cond.wait_for(lock, std::chrono::microseconds(micros - timeElapsed));
}
timeElapsed = static_cast<uint32_t>(t.GetTimeSec() * 1000000.0);
} }
return ret;
return 0;
} }
bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) { bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) {

View File

@ -3,6 +3,7 @@
#include <mutex> #include <mutex>
#include <string> #include <string>
#include <vector> #include <vector>
#include <condition_variable>
#include "common/types.h" #include "common/types.h"
namespace Core::Kernel { namespace Core::Kernel {

View File

@ -1,4 +1,4 @@
#include "core/hle/kernel/objects/physical_memory.h" #include "core/hle/kernel/Objects/physical_memory.h"
namespace Core::Kernel { namespace Core::Kernel {

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include "core/hle/kernel/objects/event_queue.h" #include "core/hle/kernel/Objects/event_queue.h"
namespace Core::Kernel { namespace Core::Kernel {

View File

@ -4,35 +4,35 @@
namespace Core::Libraries::LibC { namespace Core::Libraries::LibC {
float PS4_SYSV_ABI ps4_atan2f(float y, float x) { float PS4_SYSV_ABI ps4_atan2f(float y, float x) {
return std::atan2f(y, x); return atan2f(y, x);
} }
float PS4_SYSV_ABI ps4_acosf(float num) { float PS4_SYSV_ABI ps4_acosf(float num) {
return std::acosf(num); return acosf(num);
} }
float PS4_SYSV_ABI ps4_tanf(float num) { float PS4_SYSV_ABI ps4_tanf(float num) {
return std::tanf(num); return tanf(num);
} }
float PS4_SYSV_ABI ps4_asinf(float num) { float PS4_SYSV_ABI ps4_asinf(float num) {
return std::asinf(num); return asinf(num);
} }
double PS4_SYSV_ABI ps4_pow(double base, double exponent) { double PS4_SYSV_ABI ps4_pow(double base, double exponent) {
return std::pow(base, exponent); return pow(base, exponent);
} }
double PS4_SYSV_ABI ps4__Sin(double x) { double PS4_SYSV_ABI ps4__Sin(double x) {
return std::sin(x); return sin(x);
} }
float PS4_SYSV_ABI ps4__Fsin(float arg) { float PS4_SYSV_ABI ps4__Fsin(float arg) {
return std::sinf(arg); return sinf(arg);
} }
double PS4_SYSV_ABI ps4_exp2(double arg) { double PS4_SYSV_ABI ps4_exp2(double arg) {
return std::exp2(arg); return exp2(arg);
} }
} // namespace Core::Libraries::LibC } // namespace Core::Libraries::LibC

View File

@ -12,7 +12,7 @@ int PS4_SYSV_ABI ps4_printf(VA_ARGS) {
} }
int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) { int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) {
int fd = _fileno(file); int fd = fileno(file);
if (fd == 1 || fd == 2) { // output stdout and stderr to console if (fd == 1 || fd == 2) { // output stdout and stderr to console
VA_CTX(ctx); VA_CTX(ctx);
return printf_ctx(&ctx); return printf_ctx(&ctx);

View File

@ -15,6 +15,8 @@
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
#include <io.h> #include <io.h>
#else
#include <sys/mman.h>
#endif #endif
#include "thread_management.h" #include "thread_management.h"
@ -56,6 +58,7 @@ int* PS4_SYSV_ABI __Error() { return &libc_error; }
#define PROT_WRITE 0x2 #define PROT_WRITE 0x2
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) { int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) {
#ifdef _WIN64
PRINT_FUNCTION_NAME(); PRINT_FUNCTION_NAME();
if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported
{ {
@ -86,6 +89,14 @@ int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd,
} }
*res = ret; *res = ret;
return 0; return 0;
#else
void* result = mmap(addr, len, prot, flags, fd, offset);
if (result != MAP_FAILED) {
*res = result;
return 0;
}
std::abort();
#endif
} }
PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) { PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) {

View File

@ -1,27 +1,31 @@
#include "common/timer.h" #include "common/native_clock.h"
#include "core/hle/libraries/libkernel/time_management.h" #include "core/hle/libraries/libkernel/time_management.h"
#include "core/hle/libraries/libs.h" #include "core/hle/libraries/libs.h"
#include "emuTimer.h"
namespace Core::Libraries::LibKernel { namespace Core::Libraries::LibKernel {
static u64 initial_ptc;
static std::unique_ptr<Common::NativeClock> clock;
u64 PS4_SYSV_ABI sceKernelGetProcessTime() { u64 PS4_SYSV_ABI sceKernelGetProcessTime() {
return static_cast<u64>(Emulator::emuTimer::getTimeMsec() * 1000.0); // return time in microseconds return clock->GetProcessTimeUS();
} }
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() { u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() {
return Emulator::emuTimer::getTimeCounter(); return clock->GetUptime() - initial_ptc;
} }
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() { u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() {
return Emulator::emuTimer::getTimeFrequency(); return clock->GetTscFrequency();
} }
u64 PS4_SYSV_ABI sceKernelReadTsc() { u64 PS4_SYSV_ABI sceKernelReadTsc() {
return Common::Timer::getQueryPerformanceCounter(); return clock->GetUptime();
} }
void timeSymbolsRegister(Loader::SymbolsResolver* sym) { void timeSymbolsRegister(Loader::SymbolsResolver* sym) {
clock = std::make_unique<Common::NativeClock>();
initial_ptc = clock->GetUptime();
LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime); LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime);
LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter); LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter);
LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency); LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency);

View File

@ -658,12 +658,12 @@ void Linker::Resolve(const std::string& name, int Symtype, Module* m, Loader::Sy
} }
else else
{ {
__debugbreak();//den tha prepei na ftasoume edo //__debugbreak();//den tha prepei na ftasoume edo
} }
} }
else else
{ {
__debugbreak();//oute edo mallon //__debugbreak();//oute edo mallon
} }
} }

View File

@ -5,7 +5,7 @@
namespace Core::Loader { namespace Core::Loader {
constexpr bool log_file_loader = true; // disable it to disable logging constexpr bool log_file_loader = false; // disable it to disable logging
static std::string_view getProgramTypeName(program_type_es type) { static std::string_view getProgramTypeName(program_type_es type) {
switch (type) { switch (type) {

View File

@ -79,7 +79,8 @@ bool memory_protect(u64 address, u64 size, MemoryMode mode, MemoryMode* old_mode
} }
return true; return true;
#else #else
#error Unimplement memory_protect function int ret = mprotect(reinterpret_cast<void*>(address), size, convertMemoryMode(mode));
return true;
#endif #endif
} }
@ -117,6 +118,7 @@ bool memory_patch(u64 vaddr, u64 value) {
static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); } static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); }
u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) { u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) {
#ifdef _WIN64
// try allocate aligned address inside user area // try allocate aligned address inside user area
MEM_ADDRESS_REQUIREMENTS req{}; MEM_ADDRESS_REQUIREMENTS req{};
MEM_EXTENDED_PARAMETER param{}; MEM_EXTENDED_PARAMETER param{};
@ -134,5 +136,13 @@ u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment)
LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err); LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err);
} }
return ptr; return ptr;
#else
void* hint_address = reinterpret_cast<void*>(AlignUp(address, alignment));
void* ptr = mmap(hint_address, size, convertMemoryMode(mode), MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED) {
std::abort();
}
return reinterpret_cast<u64>(ptr);
#endif
} }
} // namespace VirtualMemory } // namespace VirtualMemory

View File

@ -1,23 +0,0 @@
#include "common/timer.h"
namespace Emulator::emuTimer {
static Common::Timer timer;
void start() {
timer.Start();
}
double getTimeMsec() {
return timer.GetTimeMsec();
}
u64 getTimeCounter() {
return timer.GetTicks();
}
u64 getTimeFrequency() {
return timer.GetFrequency();
}
} // namespace Emulator::emuTimer

View File

@ -1,10 +0,0 @@
#pragma once
#include "common/types.h"
namespace Emulator::emuTimer {
void start();
double getTimeMsec();
u64 getTimeCounter();
u64 getTimeFrequency();
} // namespace Emulator::emuTimer

View File

@ -1,6 +1,5 @@
#include <fmt/core.h> #include <fmt/core.h>
#include <vulkan_util.h> #include <vulkan_util.h>
#include "common/timer.h"
#include "common/singleton.h" #include "common/singleton.h"
#include "common/version.h" #include "common/version.h"
#include "emulator.h" #include "emulator.h"
@ -90,8 +89,6 @@ static void calculateFps(double game_time_s) {
} }
} }
void emuRun() { void emuRun() {
Common::Timer timer;
timer.Start();
auto window_ctx = Common::Singleton<Emu::WindowCtx>::Instance(); auto window_ctx = Common::Singleton<Emu::WindowCtx>::Instance();
{ {
// init window and wait until init finishes // init window and wait until init finishes
@ -100,7 +97,7 @@ void emuRun() {
Graphics::Vulkan::vulkanCreate(window_ctx); Graphics::Vulkan::vulkanCreate(window_ctx);
window_ctx->m_is_graphic_initialized = true; window_ctx->m_is_graphic_initialized = true;
window_ctx->m_graphic_initialized_cond.notify_one(); window_ctx->m_graphic_initialized_cond.notify_one();
calculateFps(timer.GetTimeSec()); calculateFps(0); // TODO: Proper fps
} }
bool exit_loop = false; bool exit_loop = false;
@ -138,10 +135,6 @@ void emuRun() {
continue; continue;
} }
if (m_game_is_paused) { if (m_game_is_paused) {
if (!timer.IsPaused()) {
timer.Pause();
}
SDL_WaitEvent(&event); SDL_WaitEvent(&event);
switch (event.type) { switch (event.type) {
@ -171,21 +164,13 @@ void emuRun() {
continue; continue;
} }
exit_loop = m_emu_needs_exit; exit_loop = m_emu_needs_exit;
if (m_game_is_paused) { if (!m_game_is_paused) {
if (!timer.IsPaused()) {
timer.Pause();
}
} else {
if (timer.IsPaused()) {
timer.Resume();
}
if (!exit_loop) { if (!exit_loop) {
update(); update();
} }
if (!exit_loop) { if (!exit_loop) {
if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec
calculateFps(timer.GetTimeSec()); calculateFps(0); // TODO: Proper fps
} }
} }
} }
@ -215,7 +200,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
window_ctx->swapchain.current_index = static_cast<u32>(-1); window_ctx->swapchain.current_index = static_cast<u32>(-1);
auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr, auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr,
window_ctx->swapchain.present_complete_fence, &window_ctx->swapchain.current_index); VK_NULL_HANDLE, &window_ctx->swapchain.current_index);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
fmt::print("Can't aquireNextImage\n"); fmt::print("Can't aquireNextImage\n");
@ -226,16 +211,6 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
std::exit(0); std::exit(0);
} }
do {
result = vkWaitForFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence, VK_TRUE, 100000000);
} while (result == VK_TIMEOUT);
if (result != VK_SUCCESS) {
fmt::print("vkWaitForFences is not success\n");
std::exit(0);
}
vkResetFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence);
auto blt_src_image = image; auto blt_src_image = image;
auto blt_dst_image = window_ctx->swapchain; auto blt_dst_image = window_ctx->swapchain;
@ -272,6 +247,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
buffer.end(); buffer.end();
buffer.executeWithSemaphore(); buffer.executeWithSemaphore();
buffer.waitForFence(); // HACK: The whole vulkan backend needs a rewrite
VkPresentInfoKHR present{}; VkPresentInfoKHR present{};
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;

View File

@ -14,7 +14,6 @@
#include "core/PS4/HLE/Graphics/video_out.h" #include "core/PS4/HLE/Graphics/video_out.h"
#include "core/hle/libraries/libs.h" #include "core/hle/libraries/libs.h"
#include "core/linker.h" #include "core/linker.h"
#include "emuTimer.h"
#include "emulator.h" #include "emulator.h"
#include <core/hle/libraries/libkernel/thread_management.h> #include <core/hle/libraries/libkernel/thread_management.h>
#include "core/file_sys/fs.h" #include "core/file_sys/fs.h"
@ -31,7 +30,6 @@ int main(int argc, char* argv[]) {
auto height = Config::getScreenHeight(); auto height = Config::getScreenHeight();
Emu::emuInit(width, height); Emu::emuInit(width, height);
HLE::Libs::Graphics::VideoOut::videoOutInit(width, height); HLE::Libs::Graphics::VideoOut::videoOutInit(width, height);
Emulator::emuTimer::start();
// Argument 1 is the path of self file to boot // Argument 1 is the path of self file to boot
const char* const path = argv[1]; const char* const path = argv[1];

View File

@ -0,0 +1,188 @@
#include "gpu_memory.h"
#include <atomic>
#include <xxh3.h>
#include "common/singleton.h"
void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
const GPUObject& info) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
}
void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
std::scoped_lock lock{gpumemory->m_mutex};
MemoryHeap h;
h.allocated_virtual_addr = virtual_addr;
h.allocated_size = size;
gpumemory->m_heaps.push_back(h);
}
u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
static std::atomic_uint64_t unique_id = 0;
VkPhysicalDeviceMemoryProperties memory_properties{};
vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
u32 index = 0;
for (; index < memory_properties.memoryTypeCount; index++) {
if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
(memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
break;
}
}
mem->type = index;
mem->offset = 0;
VkMemoryAllocateInfo alloc_info{};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.pNext = nullptr;
alloc_info.allocationSize = mem->requirements.size;
alloc_info.memoryTypeIndex = index;
mem->unique_id = ++unique_id;
auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
if (result == VK_SUCCESS) {
return true;
}
return false;
}
void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
gpumemory->flushAllHeaps(ctx);
}
int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
int index = 0;
for (const auto& heap : m_heaps) {
if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
(virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
return index;
}
index++;
}
return -1;
}
void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
int virtual_addr_num, const GPUObject& info) {
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
std::scoped_lock lock{gpumemory->m_mutex};
int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
if (heap_id < 0) {
return nullptr;
}
auto& heap = m_heaps[heap_id];
ObjInfo objInfo = {};
// Copy parameters from info to obj
for (int i = 0; i < 8; i++) {
objInfo.obj_params[i] = info.obj_params[i];
}
objInfo.gpu_object.objectType = info.objectType;
objInfo.gpu_object.obj = nullptr;
for (int h = 0; h < virtual_addr_num; h++) {
if (info.check_hash) {
objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
} else {
objInfo.hash[h] = 0;
}
}
objInfo.submit_id = submit_id;
objInfo.check_hash = info.check_hash;
objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
objInfo.update_func = info.getUpdateFunc();
int index = static_cast<int>(heap.objects.size());
HeapObject hobj{};
hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
hobj.info = objInfo;
hobj.free = false;
heap.objects.push_back(hobj);
return objInfo.gpu_object.obj;
}
GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
auto& heap = m_heaps[heap_id];
GPU::HeapBlock heapBlock{};
heapBlock.virtual_addr_num = virtual_addr_num;
for (int vi = 0; vi < virtual_addr_num; vi++) {
heapBlock.virtual_addr[vi] = virtual_addr[vi];
heapBlock.size[vi] = size[vi];
}
return heapBlock;
}
void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
auto& heap = m_heaps[heap_id];
auto& heapObj = heap.objects[obj_id];
auto& objInfo = heapObj.info;
bool need_update = false;
if (submit_id > objInfo.submit_id) {
uint64_t hash[3] = {};
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
if (objInfo.check_hash) {
hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
} else {
hash[i] = 0;
}
}
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
if (objInfo.hash[i] != hash[i]) {
need_update = true;
objInfo.hash[i] = hash[i];
}
}
if (submit_id != UINT64_MAX) {
objInfo.submit_id = submit_id;
}
}
if (need_update) {
objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
heapObj.block.virtual_addr_num);
}
}
void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
std::scoped_lock lock{m_mutex};
int heap_id = 0;
for (auto& heap : m_heaps) {
int index = 0;
for (auto& heapObj : heap.objects) {
if (!heapObj.free) {
update(UINT64_MAX, ctx, heap_id, index);
}
index++;
}
heap_id++;
}
}

View File

@ -0,0 +1,86 @@
#pragma once
#include "common/types.h"
#include <mutex>
#include <vector>
namespace VideoCore {
class GPUObject;
enum class MemoryMode : u32 {
NoAccess = 0,
Read = 1,
Write = 2,
ReadWrite = 3,
};
enum class MemoryObjectType : u64 {
Invalid,
VideoOutBuffer,
};
struct GpuMemoryObject {
MemoryObjectType object_type = MemoryObjectType::Invalid;
void* obj = nullptr;
};
struct HeapBlock {
std::array<u64, 3> virtual_address{};
std::array<u64, 3> size{};
u32 virtual_addr_num = 0;
};
class GPUObject {
public:
GPUObject() = default;
virtual ~GPUObject() = default;
u64 obj_params[8] = {};
bool check_hash = false;
bool isReadOnly = false;
MemoryObjectType objectType = MemoryObjectType::Invalid;
};
struct ObjInfo {
std::array<u64, 8> obj_params{};
GpuMemoryObject gpu_object;
std::array<u64, 3> hash{};
u64 submit_id = 0;
bool check_hash = false;
};
struct HeapObject {
HeapBlock block;
ObjInfo info;
bool free = true;
};
struct MemoryHeap {
u64 allocated_virtual_addr = 0;
u64 allocated_size = 0;
std::vector<HeapObject> objects;
};
class GPUMemory {
public:
GPUMemory() {}
virtual ~GPUMemory() {}
int getHeapId(u64 vaddr, u64 size);
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
const u64* size, int virtual_addr_num, const GPUObject& info);
HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
private:
std::mutex m_mutex;
std::vector<MemoryHeap> m_heaps;
};
void memorySetAllocArea(u64 virtual_addr, u64 size);
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
const GPUObject& info);
u64 calculate_hash(const u08* buf, u64 size);
bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
} // namespace VideoCore

View File

@ -0,0 +1,151 @@
#include <bit>
#include <cstring>
#include "video_core/tile_manager.h"
namespace VideoCore {
class TileManager32 {
public:
u32 m_macro_tile_height = 0;
u32 m_bank_height = 0;
u32 m_num_banks = 0;
u32 m_num_pipes = 0;
u32 m_padded_width = 0;
u32 m_padded_height = 0;
u32 m_pipe_bits = 0;
u32 m_bank_bits = 0;
TileManager32(u32 width, u32 height, bool is_neo) {
m_macro_tile_height = (is_neo ? 128 : 64);
m_bank_height = is_neo ? 2 : 1;
m_num_banks = is_neo ? 8 : 16;
m_num_pipes = is_neo ? 16 : 8;
m_padded_width = width;
if (height == 1080) {
m_padded_height = is_neo ? 1152 : 1088;
}
if (height == 720) {
m_padded_height = 768;
}
m_pipe_bits = is_neo ? 4 : 3;
m_bank_bits = is_neo ? 3 : 4;
}
static u32 GetElementIndex(u32 x, u32 y) {
u32 elem = 0;
elem |= ((x >> 0u) & 0x1u) << 0u;
elem |= ((x >> 1u) & 0x1u) << 1u;
elem |= ((y >> 0u) & 0x1u) << 2u;
elem |= ((x >> 2u) & 0x1u) << 3u;
elem |= ((y >> 1u) & 0x1u) << 4u;
elem |= ((y >> 2u) & 0x1u) << 5u;
return elem;
}
static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
u32 pipe = 0;
if (!is_neo) {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
} else {
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
}
return pipe;
}
static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
const u32 y_shift_offset = std::bit_width(bank_height);
const u32 xs = x >> x_shift_offset;
const u32 ys = y >> y_shift_offset;
u32 bank = 0;
switch (num_banks) {
case 8:
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
break;
case 16:
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
break;
default:;
}
return bank;
}
u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
u64 element_index = GetElementIndex(x, y);
u32 xh = x;
u32 yh = y;
u64 pipe = GetPipeIndex(xh, yh, is_neo);
u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
u64 element_offset = (element_index * 32);
u64 tile_split_slice = 0;
if (tile_bytes > 512) {
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
element_offset %= (static_cast<u64>(512) * 8);
tile_bytes = 512;
}
u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
u64 macro_tiles_per_row = m_padded_width / 128;
u64 macro_tile_row_index = y / m_macro_tile_height;
u64 macro_tile_column_index = x / 128;
u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
u64 slice_offset = tile_split_slice * slice_bytes;
u64 tile_row_index = (y / 8) % m_bank_height;
u64 tile_index = tile_row_index;
u64 tile_offset = tile_index * tile_bytes;
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
bank ^= tile_split_slice_rotation;
bank &= (m_num_banks - 1);
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
u64 bit_offset = total_offset & 0x7u;
total_offset /= 8;
u64 pipe_interleave_offset = total_offset & 0xffu;
u64 offset = total_offset >> 8u;
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
return ((byte_offset << 3u) | bit_offset) / 8;
}
};
void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
const TileManager32 t{width, height, is_neo};
for (u32 y = 0; y < height; y++) {
u32 x = 0;
u64 linear_offset = y * width * 4;
for (; x + 1 < width; x += 2) {
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
linear_offset += sizeof(u64);
}
if (x < width) {
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
}
}
}
} // namespace VideoCore

View File

@ -0,0 +1,9 @@
#pragma once
#include "common/types.h"
namespace VideoCore {
void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
} // namespace VideoCore

View File

@ -30,7 +30,6 @@ add_subdirectory(toml11 EXCLUDE_FROM_ALL)
# Vulkan # Vulkan
add_subdirectory(vulkan EXCLUDE_FROM_ALL) add_subdirectory(vulkan EXCLUDE_FROM_ALL)
target_include_directories(vulkan-1 INTERFACE vulkan/include)
# Winpthreads # Winpthreads
if (WIN32) if (WIN32)
@ -66,6 +65,6 @@ target_include_directories(imgui PUBLIC
imgui/include imgui/include
) )
target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis winpthread discord-rpc) target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis discord-rpc)

2
third-party/fmt vendored

@ -1 +1 @@
Subproject commit 661b23edeb52d400cf5812e7330f14f05c072fab Subproject commit 8e42eef4950feb5d2b76574a9cd2591dfaae2449

2
third-party/spdlog vendored

@ -1 +1 @@
Subproject commit 76dfc7e7c0d3c69d3cdaa3399b63545235ccbb02 Subproject commit 134f9194bb93072b72b8cfa27ac3bb30a0fb5b57

2
third-party/vulkan vendored

@ -1 +1 @@
Subproject commit 8c9feb4f480b32f7c7421af546aa6ffb558bdd5e Subproject commit 72b2e740754bc6b86b724fa5b2c90dca6f69462e