From fe4355877977974bf5c629390f7d73b111073872 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 15 Feb 2024 00:52:57 +0200 Subject: [PATCH] common: Rework timekeeping with native RDTSC and port to linux --- .gitmodules | 7 +- CMakeLists.txt | 17 +- src/common/log.cpp | 4 + src/common/native_clock.cpp | 43 ++++ src/common/native_clock.h | 32 +++ src/common/rdtsc.cpp | 60 ++++++ src/common/rdtsc.h | 37 ++++ src/common/timer.cpp | 108 ---------- src/common/timer.h | 43 ---- src/common/types.h | 10 +- src/common/uint128.h | 115 +++++++++++ src/core/PS4/GPU/gpu_memory.cpp | 2 +- src/core/PS4/HLE/Graphics/graphics_ctx.h | 3 +- src/core/file_sys/fs.h | 4 +- src/core/hle/kernel/Objects/event_queue.cpp | 32 +-- src/core/hle/kernel/Objects/event_queue.h | 1 + .../hle/kernel/Objects/physical_memory.cpp | 2 +- src/core/hle/kernel/event_queues.h | 2 +- src/core/hle/libraries/libc/libc_math.cpp | 16 +- src/core/hle/libraries/libc/libc_stdio.cpp | 2 +- .../hle/libraries/libkernel/libkernel.cpp | 11 + .../libraries/libkernel/time_management.cpp | 16 +- src/core/linker.cpp | 4 +- src/core/loader/elf.cpp | 2 +- src/core/virtual_memory.cpp | 12 +- src/emuTimer.cpp | 23 --- src/emuTimer.h | 10 - src/emulator.cpp | 34 +--- src/main.cpp | 2 - src/video_core/gpu_memory.cpp | 188 ++++++++++++++++++ src/video_core/gpu_memory.h | 86 ++++++++ src/video_core/tile_manager.cpp | 151 ++++++++++++++ src/video_core/tile_manager.h | 9 + third-party/CMakeLists.txt | 3 +- third-party/fmt | 2 +- third-party/spdlog | 2 +- third-party/vulkan | 2 +- 37 files changed, 818 insertions(+), 279 deletions(-) create mode 100644 src/common/native_clock.cpp create mode 100644 src/common/native_clock.h create mode 100644 src/common/rdtsc.cpp create mode 100644 src/common/rdtsc.h delete mode 100644 src/common/timer.cpp delete mode 100644 src/common/timer.h create mode 100644 src/common/uint128.h delete mode 100644 src/emuTimer.cpp delete mode 100644 src/emuTimer.h create mode 100644 src/video_core/gpu_memory.cpp create mode 100644 src/video_core/gpu_memory.h create mode 100644 src/video_core/tile_manager.cpp create mode 100644 src/video_core/tile_manager.h diff --git a/.gitmodules b/.gitmodules index 9cfbedd0..8a240d58 100644 --- a/.gitmodules +++ b/.gitmodules @@ -33,11 +33,10 @@ path = third-party/toml11 url = https://github.com/ToruNiina/toml11 branch = master -[submodule "third-party/vulkan"] - path = third-party/vulkan - url = https://github.com/shadps4/vulkan.git - branch = main [submodule "third-party/xxHash"] path = third-party/xxHash url = https://github.com/Cyan4973/xxHash.git branch = dev +[submodule "third-party/vulkan"] + path = third-party/vulkan + url = https://github.com/GPUCode/vulkan diff --git a/CMakeLists.txt b/CMakeLists.txt index 7db7e5b6..c3b2dca3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,8 +30,8 @@ endfunction() add_subdirectory(third-party) include_directories(src) -set(LIBC_SOURCES src/core/hle/libraries/libc/Libc.cpp - src/core/hle/libraries/libc/Libc.h +set(LIBC_SOURCES src/core/hle/libraries/libc/libc.cpp + src/core/hle/libraries/libc/libc.h src/core/hle/libraries/libc/printf.h src/core/hle/libraries/libc/va_ctx.h src/core/hle/libraries/libc/libc_cxa.cpp @@ -77,12 +77,15 @@ add_executable(shadps4 src/common/fs_file.h src/common/log.cpp src/common/log.h + src/common/native_clock.cpp + src/common/native_clock.h + src/common/rdtsc.cpp + src/common/rdtsc.h src/common/singleton.h src/common/string_util.cpp src/common/string_util.h - src/common/timer.cpp - src/common/timer.h src/common/types.h + src/common/uint128.h src/common/version.h ${LIBC_SOURCES} ${USERSERVICE_SOURCES} @@ -143,8 +146,6 @@ add_executable(shadps4 src/core/PS4/HLE/Graphics/graphics_render.h src/core/PS4/GPU/tile_manager.cpp src/core/PS4/GPU/tile_manager.h - src/emuTimer.cpp - src/emuTimer.h src/core/hle/libraries/libkernel/time_management.cpp src/core/hle/libraries/libkernel/time_management.h "src/common/io_file.cpp" "src/common/io_file.h") @@ -161,6 +162,8 @@ add_custom_command(TARGET shadps4 POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $) -add_custom_command(TARGET shadps4 POST_BUILD +if (WIN32) + add_custom_command(TARGET shadps4 POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $) +endif() diff --git a/src/common/log.cpp b/src/common/log.cpp index 535d4bd0..f4bb55f8 100644 --- a/src/common/log.cpp +++ b/src/common/log.cpp @@ -109,7 +109,11 @@ int Init(bool use_stdout) { if (use_stdout) { sinks.push_back(std::make_shared()); } +#ifdef _WIN64 sinks.push_back(std::make_shared(L"shadps4.txt", true)); +#else + sinks.push_back(std::make_shared("shadps4.txt", true)); +#endif spdlog::set_default_logger(std::make_shared("shadps4 logger", begin(sinks), end(sinks))); auto f = std::make_unique("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol spdlog::set_formatter(std::move(f)); diff --git a/src/common/native_clock.cpp b/src/common/native_clock.cpp new file mode 100644 index 00000000..e722de0d --- /dev/null +++ b/src/common/native_clock.cpp @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/uint128.h" +#include "common/native_clock.h" +#include "common/rdtsc.h" +#ifdef _WIN64 +#include +#else +#include +#endif + +namespace Common { + +NativeClock::NativeClock() + : rdtsc_frequency{EstimateRDTSCFrequency()}, ns_rdtsc_factor{GetFixedPoint64Factor(std::nano::den, + rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(std::micro::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(std::milli::den, rdtsc_frequency)} {} + +u64 NativeClock::GetTimeNS() const { + return MultiplyHigh(GetUptime(), ns_rdtsc_factor); +} + +u64 NativeClock::GetTimeUS() const { + return MultiplyHigh(GetUptime(), us_rdtsc_factor); +} + +u64 NativeClock::GetTimeMS() const { + return MultiplyHigh(GetUptime(), ms_rdtsc_factor); +} + +u64 NativeClock::GetUptime() const { + return FencedRDTSC(); +} + +u64 NativeClock::GetProcessTimeUS() const { + timespec ret; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret); + return ret.tv_nsec / 1000 + ret.tv_sec * 1000000; +} + +} // namespace Common::X64 diff --git a/src/common/native_clock.h b/src/common/native_clock.h new file mode 100644 index 00000000..227c8d1a --- /dev/null +++ b/src/common/native_clock.h @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Common { + +class NativeClock final { +public: + explicit NativeClock(); + + u64 GetTscFrequency() const { + return rdtsc_frequency; + } + + u64 GetTimeNS() const; + u64 GetTimeUS() const; + u64 GetTimeMS() const; + u64 GetUptime() const; + u64 GetProcessTimeUS() const; + +private: + u64 rdtsc_frequency; + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; +}; + +} // namespace Common diff --git a/src/common/rdtsc.cpp b/src/common/rdtsc.cpp new file mode 100644 index 00000000..8d231a98 --- /dev/null +++ b/src/common/rdtsc.cpp @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/rdtsc.h" +#include "common/uint128.h" + +#ifdef _WIN64 +#include +#endif + +namespace Common { + +static constexpr size_t SecondToNanoseconds = 1000000000ULL; + +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +static u64 GetTimeNs() { +#ifdef _WIN64 + // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. + static constexpr u64 Multiplier = 100; + // Convert Windows epoch to Unix epoch. + static constexpr u64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; + FILETIME filetime; + GetSystemTimePreciseAsFileTime(&filetime); + return Multiplier * ((static_cast(filetime.dwHighDateTime) << 32) + + static_cast(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); +#elif defined(__APPLE__) + return clock_gettime_nsec_np(CLOCK_REALTIME); +#else + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * SecondToNanoseconds + ts.tv_nsec; +#endif +} + +u64 EstimateRDTSCFrequency() { + // Discard the first result measuring the rdtsc. + FencedRDTSC(); + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + FencedRDTSC(); + + // Get the current time. + const auto start_time = GetTimeNs(); + const u64 tsc_start = FencedRDTSC(); + // Wait for 100 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + const auto end_time = GetTimeNs(); + const u64 tsc_end = FencedRDTSC(); + // Calculate differences. + const u64 tsc_diff = tsc_end - tsc_start; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, end_time - start_time); + return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common diff --git a/src/common/rdtsc.h b/src/common/rdtsc.h new file mode 100644 index 00000000..3180273e --- /dev/null +++ b/src/common/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include +#endif + +#include "common/types.h" + +namespace Common { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static inline u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/common/timer.cpp b/src/common/timer.cpp deleted file mode 100644 index fa3d172b..00000000 --- a/src/common/timer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "common/timer.h" - -#ifdef _WIN64 -#include -#endif - -namespace Common { - -Timer::Timer() { -#ifdef _WIN64 - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - m_Frequency = f.QuadPart; -#else -#error Unimplemented Timer constructor -#endif -} - -void Timer::Start() { -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - m_StartTime = c.QuadPart; -#else -#error Unimplemented Timer::Start() -#endif - m_is_timer_paused = false; -} - -void Timer::Pause() { -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - m_PauseTime = c.QuadPart; -#else -#error Unimplemented Timer::Pause() -#endif - m_is_timer_paused = true; -} - -void Timer::Resume() { - u64 current_time = 0; -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - current_time = c.QuadPart; -#else -#error Unimplemented Timer::Resume() -#endif - m_StartTime += current_time - m_PauseTime; - m_is_timer_paused = false; -} - -double Timer::GetTimeMsec() const { - if (m_is_timer_paused) { - return 1000.0 * (static_cast(m_PauseTime - m_StartTime)) / static_cast(m_Frequency); - } - - u64 current_time = 0; -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - current_time = c.QuadPart; -#else -#error Unimplemented Timer::GetTimeMsec() -#endif - return 1000.0 * (static_cast(current_time - m_StartTime)) / static_cast(m_Frequency); -} - -double Timer::GetTimeSec() const { - if (m_is_timer_paused) { - return (static_cast(m_PauseTime - m_StartTime)) / static_cast(m_Frequency); - } - - u64 current_time = 0; -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - current_time = c.QuadPart; -#else -#error Unimplemented Timer::GetTimeSec() -#endif - return (static_cast(current_time - m_StartTime)) / static_cast(m_Frequency); -} - -u64 Timer::GetTicks() const { - if (m_is_timer_paused) { - return (m_PauseTime - m_StartTime); - } - - u64 current_time = 0; -#ifdef _WIN64 - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - current_time = c.QuadPart; -#else -#error Unimplemented Timer::GetTicks() -#endif - return (current_time - m_StartTime); -} - -u64 Timer::getQueryPerformanceCounter() { - LARGE_INTEGER c; - QueryPerformanceCounter(&c); - return c.QuadPart; -} - -} // namespace Common diff --git a/src/common/timer.h b/src/common/timer.h deleted file mode 100644 index e390e7e6..00000000 --- a/src/common/timer.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include "common/types.h" - -namespace Common { - -class Timer final { -public: - Timer(); - ~Timer() = default; - - void Start(); - void Pause(); - void Resume(); - - bool IsPaused() const { - return m_is_timer_paused; - } - - u64 GetFrequency() const { - return m_Frequency; - } - - double GetTimeMsec() const; - double GetTimeSec() const; - u64 GetTicks() const; - - [[nodiscard]] static u64 getQueryPerformanceCounter(); - -public: - Timer(const Timer&) = delete; - Timer& operator=(const Timer&) = delete; - Timer(Timer&&) = delete; - Timer& operator=(Timer&&) = delete; - -private: - bool m_is_timer_paused = true; - u64 m_Frequency{}; - u64 m_StartTime{}; - u64 m_PauseTime{}; -}; - -} // namespace Common diff --git a/src/common/types.h b/src/common/types.h index 9ddfede9..4d86ec25 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -1,5 +1,6 @@ #pragma once +#include #include using s08 = std::int8_t; @@ -15,9 +16,12 @@ using u64 = std::uint64_t; using f32 = float; using f64 = double; +using u128 = std::array; +static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide"); + #define PS4_SYSV_ABI __attribute__((sysv_abi)) // UDLs for memory size values -constexpr u64 operator""_KB(u64 x) { return 1024ULL * x; } -constexpr u64 operator""_MB(u64 x) { return 1024_KB * x; } -constexpr u64 operator""_GB(u64 x) { return 1024_MB * x; } +constexpr unsigned long long operator""_KB(unsigned long long x) { return 1024ULL * x; } +constexpr unsigned long long operator""_MB(unsigned long long x) { return 1024_KB * x; } +constexpr unsigned long long operator""_GB(unsigned long long x) { return 1024_MB * x; } diff --git a/src/common/uint128.h b/src/common/uint128.h new file mode 100644 index 00000000..c4435791 --- /dev/null +++ b/src/common/uint128.h @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#ifdef _MSC_VER +#ifndef __clang__ +#define HAS_INTRINSICS +#include +#pragma intrinsic(__umulh) +#pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) +#else +#endif +#else +#include +#endif + +#include "common/types.h" + +namespace Common { + +// This function multiplies 2 u64 values and divides it by a u64 value. +[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { +#ifdef HAS_INTRINSICS + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; + return _udiv128(r[1], r[0], d, &remainder); +#else + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +#endif +} + +// This function multiplies 2 u64 values and produces a u128 value; +[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) { + u128 result; +#ifdef HAS_INTRINSICS + result[0] = _umul128(a, b, &result[1]); +#else + unsigned __int128 tmp = a; + tmp *= b; + std::memcpy(&result, &tmp, sizeof(u128)); +#endif + return result; +} + +[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { +#ifdef __SIZEOF_INT128__ + const auto base = static_cast(numerator) << 64ULL; + return static_cast(base / divisor); +#elif defined(_M_X64) || defined(_M_ARM64) + std::array r = {0, numerator}; + u64 remainder; + return _udiv128(r[1], r[0], divisor, &remainder); +#else + // This one is bit more inaccurate. + return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); +#endif +} + +[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) { +#ifdef __SIZEOF_INT128__ + return (static_cast(a) * static_cast(b)) >> 64; +#elif defined(_M_X64) || defined(_M_ARM64) + return __umulh(a, b); // MSVC +#else + // Generic fallback + const u64 a_lo = u32(a); + const u64 a_hi = a >> 32; + const u64 b_lo = u32(b); + const u64 b_hi = b >> 32; + + const u64 a_x_b_hi = a_hi * b_hi; + const u64 a_x_b_mid = a_hi * b_lo; + const u64 b_x_a_mid = b_hi * a_lo; + const u64 a_x_b_lo = a_lo * b_lo; + + const u64 carry_bit = (static_cast(static_cast(a_x_b_mid)) + + static_cast(static_cast(b_x_a_mid)) + (a_x_b_lo >> 32)) >> + 32; + + const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +#endif +} + +// This function divides a u128 by a u32 value and produces two u64 values: +// the result of division and the remainder +[[nodiscard]] static inline std::pair Divide128On32(u128 dividend, u32 divisor) { + u64 remainder = dividend[0] % divisor; + u64 accum = dividend[0] / divisor; + if (dividend[1] == 0) + return {accum, remainder}; + // We ignore dividend[1] / divisor as that overflows + const u64 first_segment = (dividend[1] % divisor) << 32; + accum += (first_segment / divisor) << 32; + const u64 second_segment = (first_segment % divisor) << 32; + accum += (second_segment / divisor); + remainder += second_segment % divisor; + if (remainder >= divisor) { + accum++; + remainder -= divisor; + } + return {accum, remainder}; +} + +} // namespace Common diff --git a/src/core/PS4/GPU/gpu_memory.cpp b/src/core/PS4/GPU/gpu_memory.cpp index c278f37e..8f3086bc 100644 --- a/src/core/PS4/GPU/gpu_memory.cpp +++ b/src/core/PS4/GPU/gpu_memory.cpp @@ -1,5 +1,5 @@ #include "gpu_memory.h" - +#include #include #include "common/singleton.h" diff --git a/src/core/PS4/HLE/Graphics/graphics_ctx.h b/src/core/PS4/HLE/Graphics/graphics_ctx.h index eb355056..cd2b6a3c 100644 --- a/src/core/PS4/HLE/Graphics/graphics_ctx.h +++ b/src/core/PS4/HLE/Graphics/graphics_ctx.h @@ -2,8 +2,9 @@ #include "common/types.h" #include -#include +#include #include +#include namespace HLE::Libs::Graphics { diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index 45bf718f..8bfdbff9 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include #include @@ -52,4 +54,4 @@ class HandleTable { std::mutex m_mutex; }; -} // namespace Core::FileSys \ No newline at end of file +} // namespace Core::FileSys diff --git a/src/core/hle/kernel/Objects/event_queue.cpp b/src/core/hle/kernel/Objects/event_queue.cpp index 1ca2d6f1..8720daf6 100644 --- a/src/core/hle/kernel/Objects/event_queue.cpp +++ b/src/core/hle/kernel/Objects/event_queue.cpp @@ -1,6 +1,5 @@ #include "common/debug.h" -#include "common/timer.h" -#include "core/hle/kernel/objects/event_queue.h" +#include "core/hle/kernel/Objects/event_queue.h" namespace Core::Kernel { @@ -24,28 +23,19 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { std::unique_lock lock{m_mutex}; + int ret = 0; - u32 timeElapsed = 0; - Common::Timer t; - t.Start(); + const auto predicate = [&] { + ret = getTriggeredEvents(ev, num); + return ret > 0; + }; - for (;;) { - int ret = getTriggeredEvents(ev, num); - - if (ret > 0 || (timeElapsed >= micros && micros != 0)) { - return ret; - } - - if (micros == 0) { - m_cond.wait(lock); - } else { - m_cond.wait_for(lock, std::chrono::microseconds(micros - timeElapsed)); - } - - timeElapsed = static_cast(t.GetTimeSec() * 1000000.0); + if (micros == 0) { + m_cond.wait(lock, predicate); + } else { + m_cond.wait_for(lock, std::chrono::microseconds(micros), predicate); } - - return 0; + return ret; } bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) { diff --git a/src/core/hle/kernel/Objects/event_queue.h b/src/core/hle/kernel/Objects/event_queue.h index 3217bed2..407ccee9 100644 --- a/src/core/hle/kernel/Objects/event_queue.h +++ b/src/core/hle/kernel/Objects/event_queue.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "common/types.h" namespace Core::Kernel { diff --git a/src/core/hle/kernel/Objects/physical_memory.cpp b/src/core/hle/kernel/Objects/physical_memory.cpp index a4bca4c8..58900f81 100644 --- a/src/core/hle/kernel/Objects/physical_memory.cpp +++ b/src/core/hle/kernel/Objects/physical_memory.cpp @@ -1,4 +1,4 @@ -#include "core/hle/kernel/objects/physical_memory.h" +#include "core/hle/kernel/Objects/physical_memory.h" namespace Core::Kernel { diff --git a/src/core/hle/kernel/event_queues.h b/src/core/hle/kernel/event_queues.h index e26be3bc..2f39f83a 100644 --- a/src/core/hle/kernel/event_queues.h +++ b/src/core/hle/kernel/event_queues.h @@ -1,6 +1,6 @@ #pragma once -#include "core/hle/kernel/objects/event_queue.h" +#include "core/hle/kernel/Objects/event_queue.h" namespace Core::Kernel { diff --git a/src/core/hle/libraries/libc/libc_math.cpp b/src/core/hle/libraries/libc/libc_math.cpp index 541dfb1a..4052b7c4 100644 --- a/src/core/hle/libraries/libc/libc_math.cpp +++ b/src/core/hle/libraries/libc/libc_math.cpp @@ -4,35 +4,35 @@ namespace Core::Libraries::LibC { float PS4_SYSV_ABI ps4_atan2f(float y, float x) { - return std::atan2f(y, x); + return atan2f(y, x); } float PS4_SYSV_ABI ps4_acosf(float num) { - return std::acosf(num); + return acosf(num); } float PS4_SYSV_ABI ps4_tanf(float num) { - return std::tanf(num); + return tanf(num); } float PS4_SYSV_ABI ps4_asinf(float num) { - return std::asinf(num); + return asinf(num); } double PS4_SYSV_ABI ps4_pow(double base, double exponent) { - return std::pow(base, exponent); + return pow(base, exponent); } double PS4_SYSV_ABI ps4__Sin(double x) { - return std::sin(x); + return sin(x); } float PS4_SYSV_ABI ps4__Fsin(float arg) { - return std::sinf(arg); + return sinf(arg); } double PS4_SYSV_ABI ps4_exp2(double arg) { - return std::exp2(arg); + return exp2(arg); } } // namespace Core::Libraries::LibC diff --git a/src/core/hle/libraries/libc/libc_stdio.cpp b/src/core/hle/libraries/libc/libc_stdio.cpp index b3b5dac2..aae85483 100644 --- a/src/core/hle/libraries/libc/libc_stdio.cpp +++ b/src/core/hle/libraries/libc/libc_stdio.cpp @@ -12,7 +12,7 @@ int PS4_SYSV_ABI ps4_printf(VA_ARGS) { } int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) { - int fd = _fileno(file); + int fd = fileno(file); if (fd == 1 || fd == 2) { // output stdout and stderr to console VA_CTX(ctx); return printf_ctx(&ctx); diff --git a/src/core/hle/libraries/libkernel/libkernel.cpp b/src/core/hle/libraries/libkernel/libkernel.cpp index e0cb27bd..9eadbcb8 100644 --- a/src/core/hle/libraries/libkernel/libkernel.cpp +++ b/src/core/hle/libraries/libkernel/libkernel.cpp @@ -15,6 +15,8 @@ #ifdef _WIN64 #include #include +#else +#include #endif #include "thread_management.h" @@ -56,6 +58,7 @@ int* PS4_SYSV_ABI __Error() { return &libc_error; } #define PROT_WRITE 0x2 int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) { +#ifdef _WIN64 PRINT_FUNCTION_NAME(); if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported { @@ -86,6 +89,14 @@ int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, } *res = ret; return 0; +#else + void* result = mmap(addr, len, prot, flags, fd, offset); + if (result != MAP_FAILED) { + *res = result; + return 0; + } + std::abort(); +#endif } PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) { diff --git a/src/core/hle/libraries/libkernel/time_management.cpp b/src/core/hle/libraries/libkernel/time_management.cpp index a1b40ce8..00861a1b 100644 --- a/src/core/hle/libraries/libkernel/time_management.cpp +++ b/src/core/hle/libraries/libkernel/time_management.cpp @@ -1,27 +1,31 @@ -#include "common/timer.h" +#include "common/native_clock.h" #include "core/hle/libraries/libkernel/time_management.h" #include "core/hle/libraries/libs.h" -#include "emuTimer.h" namespace Core::Libraries::LibKernel { +static u64 initial_ptc; +static std::unique_ptr clock; + u64 PS4_SYSV_ABI sceKernelGetProcessTime() { - return static_cast(Emulator::emuTimer::getTimeMsec() * 1000.0); // return time in microseconds + return clock->GetProcessTimeUS(); } u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() { - return Emulator::emuTimer::getTimeCounter(); + return clock->GetUptime() - initial_ptc; } u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() { - return Emulator::emuTimer::getTimeFrequency(); + return clock->GetTscFrequency(); } u64 PS4_SYSV_ABI sceKernelReadTsc() { - return Common::Timer::getQueryPerformanceCounter(); + return clock->GetUptime(); } void timeSymbolsRegister(Loader::SymbolsResolver* sym) { + clock = std::make_unique(); + initial_ptc = clock->GetUptime(); LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime); LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter); LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index cc35c884..611d6b1b 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -658,12 +658,12 @@ void Linker::Resolve(const std::string& name, int Symtype, Module* m, Loader::Sy } else { - __debugbreak();//den tha prepei na ftasoume edo + //__debugbreak();//den tha prepei na ftasoume edo } } else { - __debugbreak();//oute edo mallon + //__debugbreak();//oute edo mallon } } diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 729d6b93..b23f6c6f 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -5,7 +5,7 @@ namespace Core::Loader { -constexpr bool log_file_loader = true; // disable it to disable logging +constexpr bool log_file_loader = false; // disable it to disable logging static std::string_view getProgramTypeName(program_type_es type) { switch (type) { diff --git a/src/core/virtual_memory.cpp b/src/core/virtual_memory.cpp index f730aff4..cdff12c2 100644 --- a/src/core/virtual_memory.cpp +++ b/src/core/virtual_memory.cpp @@ -79,7 +79,8 @@ bool memory_protect(u64 address, u64 size, MemoryMode mode, MemoryMode* old_mode } return true; #else -#error Unimplement memory_protect function + int ret = mprotect(reinterpret_cast(address), size, convertMemoryMode(mode)); + return true; #endif } @@ -117,6 +118,7 @@ bool memory_patch(u64 vaddr, u64 value) { static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); } u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) { +#ifdef _WIN64 // try allocate aligned address inside user area MEM_ADDRESS_REQUIREMENTS req{}; MEM_EXTENDED_PARAMETER param{}; @@ -134,5 +136,13 @@ u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err); } return ptr; +#else + void* hint_address = reinterpret_cast(AlignUp(address, alignment)); + void* ptr = mmap(hint_address, size, convertMemoryMode(mode), MAP_ANON | MAP_PRIVATE, -1, 0); + if (ptr == MAP_FAILED) { + std::abort(); + } + return reinterpret_cast(ptr); +#endif } } // namespace VirtualMemory diff --git a/src/emuTimer.cpp b/src/emuTimer.cpp deleted file mode 100644 index b05c001b..00000000 --- a/src/emuTimer.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "common/timer.h" - -namespace Emulator::emuTimer { - -static Common::Timer timer; - -void start() { - timer.Start(); -} - -double getTimeMsec() { - return timer.GetTimeMsec(); -} - -u64 getTimeCounter() { - return timer.GetTicks(); -} - -u64 getTimeFrequency() { - return timer.GetFrequency(); -} - -} // namespace Emulator::emuTimer diff --git a/src/emuTimer.h b/src/emuTimer.h deleted file mode 100644 index 26c20743..00000000 --- a/src/emuTimer.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include "common/types.h" - -namespace Emulator::emuTimer { -void start(); -double getTimeMsec(); -u64 getTimeCounter(); -u64 getTimeFrequency(); -} // namespace Emulator::emuTimer diff --git a/src/emulator.cpp b/src/emulator.cpp index 51f13be6..e52cd054 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,5 @@ #include #include -#include "common/timer.h" #include "common/singleton.h" #include "common/version.h" #include "emulator.h" @@ -90,8 +89,6 @@ static void calculateFps(double game_time_s) { } } void emuRun() { - Common::Timer timer; - timer.Start(); auto window_ctx = Common::Singleton::Instance(); { // init window and wait until init finishes @@ -100,7 +97,7 @@ void emuRun() { Graphics::Vulkan::vulkanCreate(window_ctx); window_ctx->m_is_graphic_initialized = true; window_ctx->m_graphic_initialized_cond.notify_one(); - calculateFps(timer.GetTimeSec()); + calculateFps(0); // TODO: Proper fps } bool exit_loop = false; @@ -138,10 +135,6 @@ void emuRun() { continue; } if (m_game_is_paused) { - if (!timer.IsPaused()) { - timer.Pause(); - } - SDL_WaitEvent(&event); switch (event.type) { @@ -171,21 +164,13 @@ void emuRun() { continue; } exit_loop = m_emu_needs_exit; - if (m_game_is_paused) { - if (!timer.IsPaused()) { - timer.Pause(); - } - } else { - if (timer.IsPaused()) { - timer.Resume(); - } - + if (!m_game_is_paused) { if (!exit_loop) { update(); } if (!exit_loop) { if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec - calculateFps(timer.GetTimeSec()); + calculateFps(0); // TODO: Proper fps } } } @@ -215,7 +200,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) { window_ctx->swapchain.current_index = static_cast(-1); auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr, - window_ctx->swapchain.present_complete_fence, &window_ctx->swapchain.current_index); + VK_NULL_HANDLE, &window_ctx->swapchain.current_index); if (result != VK_SUCCESS) { fmt::print("Can't aquireNextImage\n"); @@ -226,16 +211,6 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) { std::exit(0); } - do { - result = vkWaitForFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence, VK_TRUE, 100000000); - } while (result == VK_TIMEOUT); - if (result != VK_SUCCESS) { - fmt::print("vkWaitForFences is not success\n"); - std::exit(0); - } - - vkResetFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence); - auto blt_src_image = image; auto blt_dst_image = window_ctx->swapchain; @@ -272,6 +247,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) { buffer.end(); buffer.executeWithSemaphore(); + buffer.waitForFence(); // HACK: The whole vulkan backend needs a rewrite VkPresentInfoKHR present{}; present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; diff --git a/src/main.cpp b/src/main.cpp index b7723ae7..84b00ee9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,7 +14,6 @@ #include "core/PS4/HLE/Graphics/video_out.h" #include "core/hle/libraries/libs.h" #include "core/linker.h" -#include "emuTimer.h" #include "emulator.h" #include #include "core/file_sys/fs.h" @@ -31,7 +30,6 @@ int main(int argc, char* argv[]) { auto height = Config::getScreenHeight(); Emu::emuInit(width, height); HLE::Libs::Graphics::VideoOut::videoOutInit(width, height); - Emulator::emuTimer::start(); // Argument 1 is the path of self file to boot const char* const path = argv[1]; diff --git a/src/video_core/gpu_memory.cpp b/src/video_core/gpu_memory.cpp new file mode 100644 index 00000000..4b5cfe6d --- /dev/null +++ b/src/video_core/gpu_memory.cpp @@ -0,0 +1,188 @@ +#include "gpu_memory.h" +#include +#include + +#include "common/singleton.h" + +void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size, + const GPUObject& info) { + auto* gpumemory = Common::Singleton::Instance(); + + return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info); +} + +void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) { + auto* gpumemory = Common::Singleton::Instance(); + + std::scoped_lock lock{gpumemory->m_mutex}; + + MemoryHeap h; + h.allocated_virtual_addr = virtual_addr; + h.allocated_size = size; + + gpumemory->m_heaps.push_back(h); +} + +u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); } + +bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) { + static std::atomic_uint64_t unique_id = 0; + + VkPhysicalDeviceMemoryProperties memory_properties{}; + vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties); + + u32 index = 0; + for (; index < memory_properties.memoryTypeCount; index++) { + if ((mem->requirements.memoryTypeBits & (static_cast(1) << index)) != 0 && + (memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) { + break; + } + } + + mem->type = index; + mem->offset = 0; + + VkMemoryAllocateInfo alloc_info{}; + alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc_info.pNext = nullptr; + alloc_info.allocationSize = mem->requirements.size; + alloc_info.memoryTypeIndex = index; + + mem->unique_id = ++unique_id; + + auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory); + + if (result == VK_SUCCESS) { + return true; + } + return false; +} + +void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) { + auto* gpumemory = Common::Singleton::Instance(); + gpumemory->flushAllHeaps(ctx); +} + +int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) { + int index = 0; + for (const auto& heap : m_heaps) { + if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) || + ((virtual_addr + size - 1) >= heap.allocated_virtual_addr && + (virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) { + return index; + } + index++; + } + return -1; +} + +void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size, + int virtual_addr_num, const GPUObject& info) { + auto* gpumemory = Common::Singleton::Instance(); + + std::scoped_lock lock{gpumemory->m_mutex}; + + int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]); + + if (heap_id < 0) { + return nullptr; + } + auto& heap = m_heaps[heap_id]; + + ObjInfo objInfo = {}; + + // Copy parameters from info to obj + for (int i = 0; i < 8; i++) { + objInfo.obj_params[i] = info.obj_params[i]; + } + + objInfo.gpu_object.objectType = info.objectType; + objInfo.gpu_object.obj = nullptr; + + for (int h = 0; h < virtual_addr_num; h++) { + if (info.check_hash) { + objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast(virtual_addr[h]), size[h]); + } else { + objInfo.hash[h] = 0; + } + } + objInfo.submit_id = submit_id; + objInfo.check_hash = info.check_hash; + + objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem); + + objInfo.update_func = info.getUpdateFunc(); + int index = static_cast(heap.objects.size()); + + HeapObject hobj{}; + hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index); + hobj.info = objInfo; + hobj.free = false; + heap.objects.push_back(hobj); + + return objInfo.gpu_object.obj; +} + +GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) { + auto& heap = m_heaps[heap_id]; + + GPU::HeapBlock heapBlock{}; + heapBlock.virtual_addr_num = virtual_addr_num; + for (int vi = 0; vi < virtual_addr_num; vi++) { + heapBlock.virtual_addr[vi] = virtual_addr[vi]; + heapBlock.size[vi] = size[vi]; + } + return heapBlock; +} + +void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) { + auto& heap = m_heaps[heap_id]; + + auto& heapObj = heap.objects[obj_id]; + auto& objInfo = heapObj.info; + bool need_update = false; + + if (submit_id > objInfo.submit_id) { + uint64_t hash[3] = {}; + + for (int i = 0; i < heapObj.block.virtual_addr_num; i++) { + if (objInfo.check_hash) { + hash[i] = GPU::calculate_hash(reinterpret_cast(heapObj.block.virtual_addr[i]), heapObj.block.size[i]); + } else { + hash[i] = 0; + } + } + + for (int i = 0; i < heapObj.block.virtual_addr_num; i++) { + if (objInfo.hash[i] != hash[i]) { + need_update = true; + objInfo.hash[i] = hash[i]; + } + } + + if (submit_id != UINT64_MAX) { + objInfo.submit_id = submit_id; + } + } + + if (need_update) { + objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size, + heapObj.block.virtual_addr_num); + } +} + +void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) { + std::scoped_lock lock{m_mutex}; + + int heap_id = 0; + for (auto& heap : m_heaps) { + int index = 0; + for (auto& heapObj : heap.objects) { + if (!heapObj.free) { + update(UINT64_MAX, ctx, heap_id, index); + } + index++; + } + heap_id++; + } +} diff --git a/src/video_core/gpu_memory.h b/src/video_core/gpu_memory.h new file mode 100644 index 00000000..87095ec7 --- /dev/null +++ b/src/video_core/gpu_memory.h @@ -0,0 +1,86 @@ +#pragma once + +#include "common/types.h" +#include +#include + +namespace VideoCore { + +class GPUObject; + +enum class MemoryMode : u32 { + NoAccess = 0, + Read = 1, + Write = 2, + ReadWrite = 3, +}; + +enum class MemoryObjectType : u64 { + Invalid, + VideoOutBuffer, +}; + +struct GpuMemoryObject { + MemoryObjectType object_type = MemoryObjectType::Invalid; + void* obj = nullptr; +}; + +struct HeapBlock { + std::array virtual_address{}; + std::array size{}; + u32 virtual_addr_num = 0; +}; + +class GPUObject { +public: + GPUObject() = default; + virtual ~GPUObject() = default; + u64 obj_params[8] = {}; + bool check_hash = false; + bool isReadOnly = false; + MemoryObjectType objectType = MemoryObjectType::Invalid; +}; + +struct ObjInfo { + std::array obj_params{}; + GpuMemoryObject gpu_object; + std::array hash{}; + u64 submit_id = 0; + bool check_hash = false; +}; + +struct HeapObject { + HeapBlock block; + ObjInfo info; + bool free = true; +}; +struct MemoryHeap { + u64 allocated_virtual_addr = 0; + u64 allocated_size = 0; + std::vector objects; +}; + +class GPUMemory { +public: + GPUMemory() {} + virtual ~GPUMemory() {} + int getHeapId(u64 vaddr, u64 size); + void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr, + const u64* size, int virtual_addr_num, const GPUObject& info); + HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id); + void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id); + void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx); + +private: + std::mutex m_mutex; + std::vector m_heaps; +}; + +void memorySetAllocArea(u64 virtual_addr, u64 size); +void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size, + const GPUObject& info); +u64 calculate_hash(const u08* buf, u64 size); +bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem); +void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx); + +} // namespace VideoCore diff --git a/src/video_core/tile_manager.cpp b/src/video_core/tile_manager.cpp new file mode 100644 index 00000000..c0d4ed3b --- /dev/null +++ b/src/video_core/tile_manager.cpp @@ -0,0 +1,151 @@ +#include +#include +#include "video_core/tile_manager.h" + +namespace VideoCore { + +class TileManager32 { + public: + u32 m_macro_tile_height = 0; + u32 m_bank_height = 0; + u32 m_num_banks = 0; + u32 m_num_pipes = 0; + u32 m_padded_width = 0; + u32 m_padded_height = 0; + u32 m_pipe_bits = 0; + u32 m_bank_bits = 0; + + TileManager32(u32 width, u32 height, bool is_neo) { + m_macro_tile_height = (is_neo ? 128 : 64); + m_bank_height = is_neo ? 2 : 1; + m_num_banks = is_neo ? 8 : 16; + m_num_pipes = is_neo ? 16 : 8; + m_padded_width = width; + if (height == 1080) { + m_padded_height = is_neo ? 1152 : 1088; + } + if (height == 720) { + m_padded_height = 768; + } + m_pipe_bits = is_neo ? 4 : 3; + m_bank_bits = is_neo ? 3 : 4; + } + + static u32 GetElementIndex(u32 x, u32 y) { + u32 elem = 0; + elem |= ((x >> 0u) & 0x1u) << 0u; + elem |= ((x >> 1u) & 0x1u) << 1u; + elem |= ((y >> 0u) & 0x1u) << 2u; + elem |= ((x >> 2u) & 0x1u) << 3u; + elem |= ((y >> 1u) & 0x1u) << 4u; + elem |= ((y >> 2u) & 0x1u) << 5u; + + return elem; + } + + static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) { + u32 pipe = 0; + + if (!is_neo) { + pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u; + pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u; + pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u; + } else { + pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u; + pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u; + pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u; + pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u; + } + + return pipe; + } + + static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) { + const u32 x_shift_offset = std::bit_width(bank_width * num_pipes); + const u32 y_shift_offset = std::bit_width(bank_height); + const u32 xs = x >> x_shift_offset; + const u32 ys = y >> y_shift_offset; + u32 bank = 0; + switch (num_banks) { + case 8: + bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u; + bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u; + bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u; + break; + case 16: + bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u; + bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u; + bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u; + bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u; + break; + default:; + } + + return bank; + } + + u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const { + u64 element_index = GetElementIndex(x, y); + + u32 xh = x; + u32 yh = y; + u64 pipe = GetPipeIndex(xh, yh, is_neo); + u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes); + u32 tile_bytes = (8 * 8 * 32 + 7) / 8; + u64 element_offset = (element_index * 32); + u64 tile_split_slice = 0; + + if (tile_bytes > 512) { + tile_split_slice = element_offset / (static_cast(512) * 8); + element_offset %= (static_cast(512) * 8); + tile_bytes = 512; + } + + u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks); + u64 macro_tiles_per_row = m_padded_width / 128; + u64 macro_tile_row_index = y / m_macro_tile_height; + u64 macro_tile_column_index = x / 128; + u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index; + u64 macro_tile_offset = macro_tile_index * macro_tile_bytes; + u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height); + u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes; + u64 slice_offset = tile_split_slice * slice_bytes; + u64 tile_row_index = (y / 8) % m_bank_height; + u64 tile_index = tile_row_index; + u64 tile_offset = tile_index * tile_bytes; + + u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice; + bank ^= tile_split_slice_rotation; + bank &= (m_num_banks - 1); + + u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset; + u64 bit_offset = total_offset & 0x7u; + total_offset /= 8; + + u64 pipe_interleave_offset = total_offset & 0xffu; + u64 offset = total_offset >> 8u; + u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits)); + + return ((byte_offset << 3u) | bit_offset) / 8; + } +}; + +void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) { + const TileManager32 t{width, height, is_neo}; + for (u32 y = 0; y < height; y++) { + u32 x = 0; + u64 linear_offset = y * width * 4; + + for (; x + 1 < width; x += 2) { + auto tiled_offset = t.GetTiledOffset(x, y, is_neo); + std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64)); + linear_offset += sizeof(u64); + } + if (x < width) { + auto tiled_offset = t.GetTiledOffset(x, y, is_neo); + std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32)); + } + } +} + +} // namespace VideoCore diff --git a/src/video_core/tile_manager.h b/src/video_core/tile_manager.h new file mode 100644 index 00000000..b58b99c9 --- /dev/null +++ b/src/video_core/tile_manager.h @@ -0,0 +1,9 @@ +#pragma once + +#include "common/types.h" + +namespace VideoCore { + +void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo); + +} // namespace VideoCore diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt index 0e04aae4..d9bbe697 100644 --- a/third-party/CMakeLists.txt +++ b/third-party/CMakeLists.txt @@ -30,7 +30,6 @@ add_subdirectory(toml11 EXCLUDE_FROM_ALL) # Vulkan add_subdirectory(vulkan EXCLUDE_FROM_ALL) -target_include_directories(vulkan-1 INTERFACE vulkan/include) # Winpthreads if (WIN32) @@ -66,6 +65,6 @@ target_include_directories(imgui PUBLIC imgui/include ) -target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis winpthread discord-rpc) +target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis discord-rpc) diff --git a/third-party/fmt b/third-party/fmt index 661b23ed..8e42eef4 160000 --- a/third-party/fmt +++ b/third-party/fmt @@ -1 +1 @@ -Subproject commit 661b23edeb52d400cf5812e7330f14f05c072fab +Subproject commit 8e42eef4950feb5d2b76574a9cd2591dfaae2449 diff --git a/third-party/spdlog b/third-party/spdlog index 76dfc7e7..134f9194 160000 --- a/third-party/spdlog +++ b/third-party/spdlog @@ -1 +1 @@ -Subproject commit 76dfc7e7c0d3c69d3cdaa3399b63545235ccbb02 +Subproject commit 134f9194bb93072b72b8cfa27ac3bb30a0fb5b57 diff --git a/third-party/vulkan b/third-party/vulkan index 8c9feb4f..72b2e740 160000 --- a/third-party/vulkan +++ b/third-party/vulkan @@ -1 +1 @@ -Subproject commit 8c9feb4f480b32f7c7421af546aa6ffb558bdd5e +Subproject commit 72b2e740754bc6b86b724fa5b2c90dca6f69462e