From 1b9bf924ca1778ac97021c26818904219565b872 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 1 May 2024 13:38:41 +0300 Subject: [PATCH] core: Rewrite thread local storage implementation (#118) --- .gitmodules | 3 + CMakeLists.txt | 66 +++--- externals/CMakeLists.txt | 5 +- externals/xbyak | 1 + src/common/logging/backend.cpp | 32 +-- src/core/linker.cpp | 28 ++- src/core/tls.cpp | 194 ++++++++---------- src/core/tls.h | 12 +- src/main.cpp | 1 - .../texture_cache/texture_cache.cpp | 15 +- src/video_core/texture_cache/texture_cache.h | 8 +- 11 files changed, 176 insertions(+), 189 deletions(-) create mode 160000 externals/xbyak diff --git a/.gitmodules b/.gitmodules index 422be4ee..ff226e83 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "externals/boost"] path = externals/boost url = https://github.com/raphaelthegreat/ext-boost +[submodule "externals/xbyak"] + path = externals/xbyak + url = https://github.com/herumi/xbyak diff --git a/CMakeLists.txt b/CMakeLists.txt index d16d2165..670190cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -341,46 +341,46 @@ set(QT_GUI ) endif() -if(ENABLE_QT_GUI) -qt_add_executable(shadps4 - ${AUDIO_CORE} - ${INPUT} - ${QT_GUI} - ${COMMON} - ${CORE} - ${VIDEO_CORE} - src/sdl_window.h - src/sdl_window.cpp -) +if (ENABLE_QT_GUI) + qt_add_executable(shadps4 + ${AUDIO_CORE} + ${INPUT} + ${QT_GUI} + ${COMMON} + ${CORE} + ${VIDEO_CORE} + src/sdl_window.h + src/sdl_window.cpp + ) else() -add_executable(shadps4 - ${AUDIO_CORE} - ${INPUT} - ${COMMON} - ${CORE} - ${VIDEO_CORE} - src/main.cpp - src/sdl_window.h - src/sdl_window.cpp -) + add_executable(shadps4 + ${AUDIO_CORE} + ${INPUT} + ${COMMON} + ${CORE} + ${VIDEO_CORE} + src/main.cpp + src/sdl_window.h + src/sdl_window.cpp + ) endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak) target_link_libraries(shadps4 PRIVATE discord-rpc boost vma vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) -if(NOT ENABLE_QT_GUI) +if (NOT ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE SDL3-shared) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND MSVC) +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND MSVC) target_link_libraries(shadps4 PRIVATE cryptoppwin zlib) else() target_link_libraries(shadps4 PRIVATE cryptopp::cryptopp zlib) endif() -if(ENABLE_QT_GUI) +if (ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE Qt6::Widgets Qt6::Concurrent) endif() @@ -388,22 +388,24 @@ if (WIN32) target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib) add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN) - add_definitions(-D_TIMESPEC_DEFINED) #needed for conflicts with time.h of windows.h + if (MSVC) + # Needed for conflicts with time.h of windows.h + add_definitions(-D_TIMESPEC_DEFINED) + endif() # Target Windows 10 RS5 add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) endif() -if(WIN32) +if (WIN32) target_sources(shadps4 PRIVATE src/shadps4.rc) endif() target_include_directories(shadps4 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) -if(ENABLE_QT_GUI) -set_target_properties(shadps4 PROPERTIES - WIN32_EXECUTABLE ON - MACOSX_BUNDLE ON -) +if (ENABLE_QT_GUI) + set_target_properties(shadps4 PROPERTIES + WIN32_EXECUTABLE ON + MACOSX_BUNDLE ON) endif() add_custom_command(TARGET shadps4 POST_BUILD diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 3334ccb8..f59aa645 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -62,4 +62,7 @@ set(ENABLE_OPT OFF CACHE BOOL "") add_subdirectory(glslang) # Robin-map -add_subdirectory(robin-map) +add_subdirectory(robin-map EXCLUDE_FROM_ALL) + +# Xbyak +add_subdirectory(xbyak EXCLUDE_FROM_ALL) diff --git a/externals/xbyak b/externals/xbyak new file mode 160000 index 00000000..80477f63 --- /dev/null +++ b/externals/xbyak @@ -0,0 +1 @@ +Subproject commit 80477f635345e8f13efc512d84b01b94cad92cd9 diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 68bbcdb8..0d75b331 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -175,28 +175,18 @@ public: using std::chrono::microseconds; using std::chrono::steady_clock; + const Entry entry = { + .timestamp = duration_cast(steady_clock::now() - time_origin), + .log_class = log_class, + .log_level = log_level, + .filename = filename, + .line_num = line_num, + .function = function, + .message = std::move(message), + }; if (Config::getLogType() == "async") { - - message_queue.EmplaceWait(Entry{ - .timestamp = duration_cast(steady_clock::now() - time_origin), - .log_class = log_class, - .log_level = log_level, - .filename = filename, - .line_num = line_num, - .function = function, - .message = std::move(message), - }); + message_queue.EmplaceWait(entry); } else { - - const Entry entry = { - .timestamp = duration_cast(steady_clock::now() - time_origin), - .log_class = log_class, - .log_level = log_level, - .filename = filename, - .line_num = line_num, - .function = function, - .message = std::move(message), - }; ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); } } @@ -239,7 +229,7 @@ private: } void ForEachBackend(auto lambda) { - lambda(debugger_backend); + // lambda(debugger_backend); lambda(color_console_backend); lambda(file_backend); } diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 0c068cd9..316bcdd2 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/config.h" #include "common/logging/log.h" #include "common/path_util.h" @@ -94,12 +95,17 @@ void Linker::LoadModuleToMemory(Module* m) { 0x1000; // align base size to 0x1000 block size (TODO is that the default // block size or it can be changed? - m->base_virtual_addr = VirtualMemory::memory_alloc(LoadAddress, m->aligned_base_size, - VirtualMemory::MemoryMode::ExecuteReadWrite); + static constexpr u64 TrampolineSize = 8_MB; + m->base_virtual_addr = + VirtualMemory::memory_alloc(LoadAddress, m->aligned_base_size + TrampolineSize, + VirtualMemory::MemoryMode::ExecuteReadWrite); LoadAddress += CODE_BASE_INCR * (1 + m->aligned_base_size / CODE_BASE_INCR); - LOG_INFO(Core_Linker, "====Load Module to Memory ========"); + void* trampoline_addr = reinterpret_cast(m->base_virtual_addr + m->aligned_base_size); + Xbyak::CodeGenerator c(TrampolineSize, trampoline_addr); + + LOG_INFO(Core_Linker, "======== Load Module to Memory ========"); LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", m->base_virtual_addr); LOG_INFO(Core_Linker, "base_size ..............: {:#018x}", base_size); LOG_INFO(Core_Linker, "aligned_base_size ......: {:#018x}", m->aligned_base_size); @@ -123,7 +129,7 @@ void Linker::LoadModuleToMemory(Module* m) { m->elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, segment_file_size); if (elf_pheader[i].p_flags & PF_EXEC) { - PatchTLS(segment_addr, segment_file_size); + PatchTLS(segment_addr, segment_file_size, c); } } else { LOG_ERROR(Core_Linker, "p_memsz==0 in type {}", @@ -153,8 +159,8 @@ void Linker::LoadModuleToMemory(Module* m) { case PT_TLS: m->tls.image_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr; m->tls.image_size = GetAlignedSize(elf_pheader[i]); - LOG_INFO(Core_Linker, "tls virtual address ={:#x}", m->tls.image_virtual_addr); - LOG_INFO(Core_Linker, "tls image size ={}", m->tls.image_size); + LOG_INFO(Core_Linker, "TLS virtual address = {:#x}", m->tls.image_virtual_addr); + LOG_INFO(Core_Linker, "TLS image size = {}", m->tls.image_size); break; case PT_SCE_PROCPARAM: m->proc_param_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr; @@ -662,7 +668,7 @@ static void RunMainEntry(u64 addr, EntryParams* params, exit_func_t exit_func) { // there's no coming back : : "r"(addr), "r"(params), "r"(exit_func) - : "rax", "rsi", "rdi", "rsp"); + : "rax", "rsi", "rdi"); } void Linker::Execute() { @@ -681,9 +687,13 @@ void Linker::Execute() { p.argv[0] = "eboot.bin"; // hmm should be ok? for (auto& m : m_modules) { - if (!m->elf.IsSharedLib()) { - RunMainEntry(m->elf.GetElfEntry() + m->base_virtual_addr, &p, ProgramExitFunc); + if (m->elf.IsSharedLib()) { + continue; } + if (m->tls.image_virtual_addr != 0) { + SetTLSStorage(m->tls.image_virtual_addr); + } + RunMainEntry(m->elf.GetElfEntry() + m->base_virtual_addr, &p, ProgramExitFunc); } } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index 6291d1ab..b945baef 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "common/types.h" #include "core/tls.h" @@ -11,25 +12,18 @@ namespace Core { -thread_local u8 TLS[1024]; - struct TLSPattern { - uint8_t pattern[5]; - uint8_t pattern_size; - uint8_t imm_size; - uint8_t target_reg; + u8 pattern[5]; + u8 pattern_size; + u8 imm_size; + u8 target_reg; }; constexpr static TLSPattern TlsPatterns[] = { - {{0x64, 0x48, 0xA1}, - 3, - 8, - 0}, // 64 48 A1 | 00 00 00 00 00 00 00 00 # mov rax, qword ptr fs:[64b imm] - - {{0x64, 0x48, 0x8B, 0x4, 0x25}, - 5, - 4, - 0}, // 64 48 8B 04 25 | 00 00 00 00 # mov rax,qword ptr fs:[0] + // 64 48 A1 | 00 00 00 00 00 00 00 00 # mov rax, qword ptr fs:[64b imm] + {{0x64, 0x48, 0xA1}, 3, 8, 0}, + // 64 48 8B 04 25 | 00 00 00 00 # mov rax,qword ptr fs:[0] + {{0x64, 0x48, 0x8B, 0x4, 0x25}, 5, 4, 0}, // rax {{0x64, 0x48, 0x8B, 0xC, 0x25}, 5, 4, 1}, // rcx {{0x64, 0x48, 0x8B, 0x14, 0x25}, 5, 4, 2}, // rdx {{0x64, 0x48, 0x8B, 0x1C, 0x25}, 5, 4, 3}, // rbx @@ -47,103 +41,28 @@ constexpr static TLSPattern TlsPatterns[] = { {{0x64, 0x4C, 0x8B, 0x3C, 0x25}, 5, 4, 15}, // r15 }; -uintptr_t GetGuestTls(s64 tls_offset) { - if (tls_offset == 0) { - return reinterpret_cast(TLS); - } - UNREACHABLE_MSG("Unimplemented offset info tls"); +#ifdef _WIN32 +static DWORD slot = 0; + +void SetTLSStorage(u64 image_address) { + // Guest apps will use both positive and negative offsets to the TLS pointer. + // User data at probably in negative offsets, while pthread data at positive offset. + const BOOL result = TlsSetValue(slot, reinterpret_cast(image_address)); + ASSERT(result != 0); } -#ifdef _WIN64 -static LONG WINAPI ExceptionHandler(PEXCEPTION_POINTERS pExp) noexcept { - auto orig_rip = pExp->ContextRecord->Rip; - while (*(u8*)pExp->ContextRecord->Rip == 0x66) { - pExp->ContextRecord->Rip++; - } +void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { + using namespace Xbyak::util; - if (*(u8*)pExp->ContextRecord->Rip == 0xcd) { - int reg = *(u8*)(pExp->ContextRecord->Rip + 1) - 0x80; - int sizes = *(u8*)(pExp->ContextRecord->Rip + 2); - int pattern_size = sizes & 0xF; - int imm_size = sizes >> 4; - - int64_t tls_offset; - if (imm_size == 4) { - tls_offset = *(s32*)(pExp->ContextRecord->Rip + pattern_size); - } else { - tls_offset = *(s64*)(pExp->ContextRecord->Rip + pattern_size); - } - - (&pExp->ContextRecord->Rax)[reg] = GetGuestTls(tls_offset); /* GetGuestTls */ - pExp->ContextRecord->Rip += pattern_size + imm_size; - - return EXCEPTION_CONTINUE_EXECUTION; - } - - pExp->ContextRecord->Rip = orig_rip; - const u32 ec = pExp->ExceptionRecord->ExceptionCode; - switch (ec) { - case EXCEPTION_ACCESS_VIOLATION: { - LOG_CRITICAL(Core, "Exception EXCEPTION_ACCESS_VIOLATION ({:#x})", ec); - const auto info = pExp->ExceptionRecord->ExceptionInformation; - switch (info[0]) { - case 0: - LOG_CRITICAL(Core, "Read violation at address {:#x}", info[1]); - break; - case 1: - LOG_CRITICAL(Core, "Write violation at address {:#x}", info[1]); - break; - case 8: - LOG_CRITICAL(Core, "DEP violation at address {:#x}", info[1]); - break; - default: - break; - } - break; - } - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - LOG_CRITICAL(Core, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED ({:#x})", ec); - break; - case EXCEPTION_DATATYPE_MISALIGNMENT: - LOG_CRITICAL(Core, "Exception EXCEPTION_DATATYPE_MISALIGNMENT ({:#x})", ec); - break; - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - LOG_CRITICAL(Core, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO ({:#x})", ec); - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - LOG_CRITICAL(Core, "Exception EXCEPTION_ILLEGAL_INSTRUCTION ({:#x})", ec); - break; - case EXCEPTION_IN_PAGE_ERROR: - LOG_CRITICAL(Core, "Exception EXCEPTION_IN_PAGE_ERROR ({:#x})", ec); - break; - case EXCEPTION_INT_DIVIDE_BY_ZERO: - LOG_CRITICAL(Core, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO ({:#x})", ec); - break; - case EXCEPTION_PRIV_INSTRUCTION: - LOG_CRITICAL(Core, "Exception EXCEPTION_PRIV_INSTRUCTION ({:#x})", ec); - break; - case EXCEPTION_STACK_OVERFLOW: - LOG_CRITICAL(Core, "Exception EXCEPTION_STACK_OVERFLOW ({:#x})", ec); - break; - default: - return EXCEPTION_CONTINUE_SEARCH; - } - return EXCEPTION_CONTINUE_SEARCH; -} -#endif - -void InstallTlsHandler() { -#ifdef _WIN64 - if (!AddVectoredExceptionHandler(0, ExceptionHandler)) { - LOG_CRITICAL(Core, "Failed to register an exception handler"); - } -#endif -} - -void PatchTLS(u64 segment_addr, u64 segment_size) { u8* code = reinterpret_cast(segment_addr); auto remaining_size = segment_size; + // Sometimes loads from the FS segment are prefixed with useless operand size prefix bytes like: + // |66 66 66| 64 48 8b 04 25 00 # mov rax, qword ptr fs:[0x0] + // These are probably ignored by the processor but when patching the instruction to a jump + // they cause issues. So look for them and patch them to nop to avoid problems. + static constexpr std::array BadPrefix = {0x66, 0x66, 0x66}; + while (remaining_size) { for (const auto& tls_pattern : TlsPatterns) { const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size; @@ -153,18 +72,55 @@ void PatchTLS(u64 segment_addr, u64 segment_size) { if (std::memcmp(code, tls_pattern.pattern, tls_pattern.pattern_size) != 0) { continue; } + u64 offset = 0; if (tls_pattern.imm_size == 4) { + std::memcpy(&offset, code + tls_pattern.pattern_size, sizeof(u32)); LOG_INFO(Core_Linker, "PATTERN32 FOUND at {}, reg: {} offset: {:#x}", - fmt::ptr(code), tls_pattern.target_reg, - *(u32*)(code + tls_pattern.pattern_size)); + fmt::ptr(code), tls_pattern.target_reg, offset); } else { + std::memcpy(&offset, code + tls_pattern.pattern_size, sizeof(u64)); LOG_INFO(Core_Linker, "PATTERN64 FOUND at {}, reg: {} offset: {:#x}", - fmt::ptr(code), tls_pattern.target_reg, - *(u32*)(code + tls_pattern.pattern_size)); + fmt::ptr(code), tls_pattern.target_reg, offset); } - code[0] = 0xcd; - code[1] = 0x80 + tls_pattern.target_reg; - code[2] = tls_pattern.pattern_size | (tls_pattern.imm_size << 4); + ASSERT(offset == 0); + + // Allocate slot in the process if not done already. + if (slot == 0) { + slot = TlsAlloc(); + } + + // Replace bogus instruction prefix with nops if it exists. + if (std::memcmp(code - BadPrefix.size(), BadPrefix.data(), sizeof(BadPrefix)) == 0) { + auto patch = Xbyak::CodeGenerator(BadPrefix.size(), code - BadPrefix.size()); + patch.nop(BadPrefix.size()); + } + + // Replace mov instruction with near jump to the trampoline. + static constexpr u32 NearJmpSize = 5; + auto patch = Xbyak::CodeGenerator(total_size, code); + patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR); + patch.nop(total_size - NearJmpSize); + + // Write the trampoline. + // The following logic is based on the wine implementation of TlsGetValue + // https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719 + static constexpr u32 TlsSlotsOffset = 0x1480; + static constexpr u32 TlsExpansionSlotsOffset = 0x1780; + static constexpr u32 TlsMinimumAvailable = 64; + const u32 teb_offset = + slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset; + const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable; + + const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg); + c.mov(target_reg, teb_offset); + c.putSeg(gs); + c.mov(target_reg, ptr[target_reg]); // Load the pointer to the table of tls slots. + c.mov( + target_reg, + qword[target_reg + tls_index * sizeof(LPVOID)]); // Load the pointer to our buffer. + c.jmp(code + total_size); // Return to the instruction right after the mov. + + // Move ahead in module. code += total_size - 1; remaining_size -= total_size - 1; break; @@ -174,4 +130,16 @@ void PatchTLS(u64 segment_addr, u64 segment_size) { } } +#else + +void SetTLSStorage(u64 image_address) { + UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!"); +} + +void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { + UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!"); +} + +#endif + } // namespace Core diff --git a/src/core/tls.h b/src/core/tls.h index aa001178..e9825bf6 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,12 +5,16 @@ #include "common/types.h" +namespace Xbyak { +class CodeGenerator; +} + namespace Core { -/// Installs a host exception handler to handle guest TLS access. -void InstallTlsHandler(); +/// Sets the data pointer that contains the TLS image. +void SetTLSStorage(u64 image_address); -/// Patches any instructions that access TLS to trigger the exception handler. -void PatchTLS(u64 segment_addr, u64 segment_size); +/// Patches any instructions that access guest TLS to use provided storage. +void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c); } // namespace Core diff --git a/src/main.cpp b/src/main.cpp index 8898ccd4..967bd227 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -50,7 +50,6 @@ int main(int argc, char* argv[]) { auto linker = Common::Singleton::Instance(); Libraries::InitHLELibs(&linker->getHLESymbols()); - Core::InstallTlsHandler(); linker->LoadModule(path); // Check if there is a libc.prx in sce_module folder diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 181a5c78..8119ee4c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "common/config.h" #include "core/libraries/videoout/buffer.h" +#include "core/virtual_memory.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" @@ -15,7 +17,7 @@ #define PAGE_NOACCESS PROT_NONE #define PAGE_READWRITE (PROT_READ | PROT_WRITE) #else -#include +#include void mprotect(void* addr, size_t len, int prot) { DWORD old_prot{}; @@ -57,6 +59,7 @@ LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept { #endif static constexpr u64 StreamBufferSize = 128_MB; +static constexpr u64 PageShift = 12; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_) : instance{instance_}, scheduler{scheduler_}, @@ -76,7 +79,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& guest_access_fault.sa_mask = signal_mask; sigaction(SIGSEGV, &guest_access_fault, nullptr); #else - veh_handle = AddVectoredExceptionHandler(1, GuestFaultSignalHandler); + veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler); ASSERT_MSG(veh_handle, "Failed to register an exception handler"); #endif g_texture_cache = this; @@ -243,8 +246,8 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) { } void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { - const u64 num_pages = ((addr + size - 1) >> PageBits) - (addr >> PageBits) + 1; - const u64 page_start = addr >> PageBits; + const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; + const u64 page_start = addr >> PageShift; const u64 page_end = page_start + num_pages; const auto pages_interval = @@ -256,8 +259,8 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { const auto& range = cached_pages.equal_range(pages_interval); for (const auto& [range, count] : boost::make_iterator_range(range)) { const auto interval = range & pages_interval; - const VAddr interval_start_addr = boost::icl::first(interval) << PageBits; - const VAddr interval_end_addr = boost::icl::last_next(interval) << PageBits; + const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; + const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; const u32 interval_size = interval_end_addr - interval_start_addr; void* addr = reinterpret_cast(interval_start_addr); if (delta > 0 && count == delta) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 472ff04f..bdc565b2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include #include @@ -19,7 +18,12 @@ struct BufferAttributeGroup; namespace VideoCore { class TextureCache { - static constexpr u64 PageBits = 14; + // This is the page shift for adding images into the hash map. It isn't related to + // the page size of the guest or the host and is chosen for convenience. A number too + // small will increase the number of hash map lookups per image, while too large will + // increase the number of images per page. + static constexpr u64 PageBits = 20; + static constexpr u64 PageMask = (1ULL << PageBits) - 1; public: explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);