core: Rewrite thread local storage implementation (#118)
This commit is contained in:
parent
b94efcba5a
commit
1b9bf924ca
|
@ -61,3 +61,6 @@
|
||||||
[submodule "externals/boost"]
|
[submodule "externals/boost"]
|
||||||
path = externals/boost
|
path = externals/boost
|
||||||
url = https://github.com/raphaelthegreat/ext-boost
|
url = https://github.com/raphaelthegreat/ext-boost
|
||||||
|
[submodule "externals/xbyak"]
|
||||||
|
path = externals/xbyak
|
||||||
|
url = https://github.com/herumi/xbyak
|
||||||
|
|
|
@ -367,7 +367,7 @@ endif()
|
||||||
|
|
||||||
create_target_directory_groups(shadps4)
|
create_target_directory_groups(shadps4)
|
||||||
|
|
||||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map)
|
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak)
|
||||||
target_link_libraries(shadps4 PRIVATE discord-rpc boost vma vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared)
|
target_link_libraries(shadps4 PRIVATE discord-rpc boost vma vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared)
|
||||||
|
|
||||||
if (NOT ENABLE_QT_GUI)
|
if (NOT ENABLE_QT_GUI)
|
||||||
|
@ -388,7 +388,10 @@ if (WIN32)
|
||||||
target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib)
|
target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib)
|
||||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
|
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
|
||||||
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN)
|
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN)
|
||||||
add_definitions(-D_TIMESPEC_DEFINED) #needed for conflicts with time.h of windows.h
|
if (MSVC)
|
||||||
|
# Needed for conflicts with time.h of windows.h
|
||||||
|
add_definitions(-D_TIMESPEC_DEFINED)
|
||||||
|
endif()
|
||||||
# Target Windows 10 RS5
|
# Target Windows 10 RS5
|
||||||
add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00)
|
add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00)
|
||||||
endif()
|
endif()
|
||||||
|
@ -402,8 +405,7 @@ target_include_directories(shadps4 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
if (ENABLE_QT_GUI)
|
if (ENABLE_QT_GUI)
|
||||||
set_target_properties(shadps4 PROPERTIES
|
set_target_properties(shadps4 PROPERTIES
|
||||||
WIN32_EXECUTABLE ON
|
WIN32_EXECUTABLE ON
|
||||||
MACOSX_BUNDLE ON
|
MACOSX_BUNDLE ON)
|
||||||
)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(TARGET shadps4 POST_BUILD
|
add_custom_command(TARGET shadps4 POST_BUILD
|
||||||
|
|
|
@ -62,4 +62,7 @@ set(ENABLE_OPT OFF CACHE BOOL "")
|
||||||
add_subdirectory(glslang)
|
add_subdirectory(glslang)
|
||||||
|
|
||||||
# Robin-map
|
# Robin-map
|
||||||
add_subdirectory(robin-map)
|
add_subdirectory(robin-map EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
|
# Xbyak
|
||||||
|
add_subdirectory(xbyak EXCLUDE_FROM_ALL)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 80477f635345e8f13efc512d84b01b94cad92cd9
|
|
@ -175,19 +175,6 @@ public:
|
||||||
using std::chrono::microseconds;
|
using std::chrono::microseconds;
|
||||||
using std::chrono::steady_clock;
|
using std::chrono::steady_clock;
|
||||||
|
|
||||||
if (Config::getLogType() == "async") {
|
|
||||||
|
|
||||||
message_queue.EmplaceWait(Entry{
|
|
||||||
.timestamp = duration_cast<microseconds>(steady_clock::now() - time_origin),
|
|
||||||
.log_class = log_class,
|
|
||||||
.log_level = log_level,
|
|
||||||
.filename = filename,
|
|
||||||
.line_num = line_num,
|
|
||||||
.function = function,
|
|
||||||
.message = std::move(message),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
|
|
||||||
const Entry entry = {
|
const Entry entry = {
|
||||||
.timestamp = duration_cast<microseconds>(steady_clock::now() - time_origin),
|
.timestamp = duration_cast<microseconds>(steady_clock::now() - time_origin),
|
||||||
.log_class = log_class,
|
.log_class = log_class,
|
||||||
|
@ -197,6 +184,9 @@ public:
|
||||||
.function = function,
|
.function = function,
|
||||||
.message = std::move(message),
|
.message = std::move(message),
|
||||||
};
|
};
|
||||||
|
if (Config::getLogType() == "async") {
|
||||||
|
message_queue.EmplaceWait(entry);
|
||||||
|
} else {
|
||||||
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
|
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -239,7 +229,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void ForEachBackend(auto lambda) {
|
void ForEachBackend(auto lambda) {
|
||||||
lambda(debugger_backend);
|
// lambda(debugger_backend);
|
||||||
lambda(color_console_backend);
|
lambda(color_console_backend);
|
||||||
lambda(file_backend);
|
lambda(file_backend);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <Zydis/Zydis.h>
|
#include <Zydis/Zydis.h>
|
||||||
#include <common/assert.h>
|
#include <common/assert.h>
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
|
@ -94,12 +95,17 @@ void Linker::LoadModuleToMemory(Module* m) {
|
||||||
0x1000; // align base size to 0x1000 block size (TODO is that the default
|
0x1000; // align base size to 0x1000 block size (TODO is that the default
|
||||||
// block size or it can be changed?
|
// block size or it can be changed?
|
||||||
|
|
||||||
m->base_virtual_addr = VirtualMemory::memory_alloc(LoadAddress, m->aligned_base_size,
|
static constexpr u64 TrampolineSize = 8_MB;
|
||||||
|
m->base_virtual_addr =
|
||||||
|
VirtualMemory::memory_alloc(LoadAddress, m->aligned_base_size + TrampolineSize,
|
||||||
VirtualMemory::MemoryMode::ExecuteReadWrite);
|
VirtualMemory::MemoryMode::ExecuteReadWrite);
|
||||||
|
|
||||||
LoadAddress += CODE_BASE_INCR * (1 + m->aligned_base_size / CODE_BASE_INCR);
|
LoadAddress += CODE_BASE_INCR * (1 + m->aligned_base_size / CODE_BASE_INCR);
|
||||||
|
|
||||||
LOG_INFO(Core_Linker, "====Load Module to Memory ========");
|
void* trampoline_addr = reinterpret_cast<void*>(m->base_virtual_addr + m->aligned_base_size);
|
||||||
|
Xbyak::CodeGenerator c(TrampolineSize, trampoline_addr);
|
||||||
|
|
||||||
|
LOG_INFO(Core_Linker, "======== Load Module to Memory ========");
|
||||||
LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", m->base_virtual_addr);
|
LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", m->base_virtual_addr);
|
||||||
LOG_INFO(Core_Linker, "base_size ..............: {:#018x}", base_size);
|
LOG_INFO(Core_Linker, "base_size ..............: {:#018x}", base_size);
|
||||||
LOG_INFO(Core_Linker, "aligned_base_size ......: {:#018x}", m->aligned_base_size);
|
LOG_INFO(Core_Linker, "aligned_base_size ......: {:#018x}", m->aligned_base_size);
|
||||||
|
@ -123,7 +129,7 @@ void Linker::LoadModuleToMemory(Module* m) {
|
||||||
m->elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, segment_file_size);
|
m->elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, segment_file_size);
|
||||||
|
|
||||||
if (elf_pheader[i].p_flags & PF_EXEC) {
|
if (elf_pheader[i].p_flags & PF_EXEC) {
|
||||||
PatchTLS(segment_addr, segment_file_size);
|
PatchTLS(segment_addr, segment_file_size, c);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(Core_Linker, "p_memsz==0 in type {}",
|
LOG_ERROR(Core_Linker, "p_memsz==0 in type {}",
|
||||||
|
@ -153,8 +159,8 @@ void Linker::LoadModuleToMemory(Module* m) {
|
||||||
case PT_TLS:
|
case PT_TLS:
|
||||||
m->tls.image_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr;
|
m->tls.image_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr;
|
||||||
m->tls.image_size = GetAlignedSize(elf_pheader[i]);
|
m->tls.image_size = GetAlignedSize(elf_pheader[i]);
|
||||||
LOG_INFO(Core_Linker, "tls virtual address ={:#x}", m->tls.image_virtual_addr);
|
LOG_INFO(Core_Linker, "TLS virtual address = {:#x}", m->tls.image_virtual_addr);
|
||||||
LOG_INFO(Core_Linker, "tls image size ={}", m->tls.image_size);
|
LOG_INFO(Core_Linker, "TLS image size = {}", m->tls.image_size);
|
||||||
break;
|
break;
|
||||||
case PT_SCE_PROCPARAM:
|
case PT_SCE_PROCPARAM:
|
||||||
m->proc_param_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr;
|
m->proc_param_virtual_addr = elf_pheader[i].p_vaddr + m->base_virtual_addr;
|
||||||
|
@ -662,7 +668,7 @@ static void RunMainEntry(u64 addr, EntryParams* params, exit_func_t exit_func) {
|
||||||
// there's no coming back
|
// there's no coming back
|
||||||
:
|
:
|
||||||
: "r"(addr), "r"(params), "r"(exit_func)
|
: "r"(addr), "r"(params), "r"(exit_func)
|
||||||
: "rax", "rsi", "rdi", "rsp");
|
: "rax", "rsi", "rdi");
|
||||||
}
|
}
|
||||||
|
|
||||||
void Linker::Execute() {
|
void Linker::Execute() {
|
||||||
|
@ -681,9 +687,13 @@ void Linker::Execute() {
|
||||||
p.argv[0] = "eboot.bin"; // hmm should be ok?
|
p.argv[0] = "eboot.bin"; // hmm should be ok?
|
||||||
|
|
||||||
for (auto& m : m_modules) {
|
for (auto& m : m_modules) {
|
||||||
if (!m->elf.IsSharedLib()) {
|
if (m->elf.IsSharedLib()) {
|
||||||
RunMainEntry(m->elf.GetElfEntry() + m->base_virtual_addr, &p, ProgramExitFunc);
|
continue;
|
||||||
}
|
}
|
||||||
|
if (m->tls.image_virtual_addr != 0) {
|
||||||
|
SetTLSStorage(m->tls.image_virtual_addr);
|
||||||
|
}
|
||||||
|
RunMainEntry(m->elf.GetElfEntry() + m->base_virtual_addr, &p, ProgramExitFunc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
194
src/core/tls.cpp
194
src/core/tls.cpp
|
@ -1,6 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "core/tls.h"
|
#include "core/tls.h"
|
||||||
|
@ -11,25 +12,18 @@
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
thread_local u8 TLS[1024];
|
|
||||||
|
|
||||||
struct TLSPattern {
|
struct TLSPattern {
|
||||||
uint8_t pattern[5];
|
u8 pattern[5];
|
||||||
uint8_t pattern_size;
|
u8 pattern_size;
|
||||||
uint8_t imm_size;
|
u8 imm_size;
|
||||||
uint8_t target_reg;
|
u8 target_reg;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr static TLSPattern TlsPatterns[] = {
|
constexpr static TLSPattern TlsPatterns[] = {
|
||||||
{{0x64, 0x48, 0xA1},
|
// 64 48 A1 | 00 00 00 00 00 00 00 00 # mov rax, qword ptr fs:[64b imm]
|
||||||
3,
|
{{0x64, 0x48, 0xA1}, 3, 8, 0},
|
||||||
8,
|
// 64 48 8B 04 25 | 00 00 00 00 # mov rax,qword ptr fs:[0]
|
||||||
0}, // 64 48 A1 | 00 00 00 00 00 00 00 00 # mov rax, qword ptr fs:[64b imm]
|
{{0x64, 0x48, 0x8B, 0x4, 0x25}, 5, 4, 0}, // rax
|
||||||
|
|
||||||
{{0x64, 0x48, 0x8B, 0x4, 0x25},
|
|
||||||
5,
|
|
||||||
4,
|
|
||||||
0}, // 64 48 8B 04 25 | 00 00 00 00 # mov rax,qword ptr fs:[0]
|
|
||||||
{{0x64, 0x48, 0x8B, 0xC, 0x25}, 5, 4, 1}, // rcx
|
{{0x64, 0x48, 0x8B, 0xC, 0x25}, 5, 4, 1}, // rcx
|
||||||
{{0x64, 0x48, 0x8B, 0x14, 0x25}, 5, 4, 2}, // rdx
|
{{0x64, 0x48, 0x8B, 0x14, 0x25}, 5, 4, 2}, // rdx
|
||||||
{{0x64, 0x48, 0x8B, 0x1C, 0x25}, 5, 4, 3}, // rbx
|
{{0x64, 0x48, 0x8B, 0x1C, 0x25}, 5, 4, 3}, // rbx
|
||||||
|
@ -47,103 +41,28 @@ constexpr static TLSPattern TlsPatterns[] = {
|
||||||
{{0x64, 0x4C, 0x8B, 0x3C, 0x25}, 5, 4, 15}, // r15
|
{{0x64, 0x4C, 0x8B, 0x3C, 0x25}, 5, 4, 15}, // r15
|
||||||
};
|
};
|
||||||
|
|
||||||
uintptr_t GetGuestTls(s64 tls_offset) {
|
#ifdef _WIN32
|
||||||
if (tls_offset == 0) {
|
static DWORD slot = 0;
|
||||||
return reinterpret_cast<uintptr_t>(TLS);
|
|
||||||
}
|
void SetTLSStorage(u64 image_address) {
|
||||||
UNREACHABLE_MSG("Unimplemented offset info tls");
|
// Guest apps will use both positive and negative offsets to the TLS pointer.
|
||||||
|
// User data at probably in negative offsets, while pthread data at positive offset.
|
||||||
|
const BOOL result = TlsSetValue(slot, reinterpret_cast<LPVOID>(image_address));
|
||||||
|
ASSERT(result != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN64
|
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
|
||||||
static LONG WINAPI ExceptionHandler(PEXCEPTION_POINTERS pExp) noexcept {
|
using namespace Xbyak::util;
|
||||||
auto orig_rip = pExp->ContextRecord->Rip;
|
|
||||||
while (*(u8*)pExp->ContextRecord->Rip == 0x66) {
|
|
||||||
pExp->ContextRecord->Rip++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*(u8*)pExp->ContextRecord->Rip == 0xcd) {
|
|
||||||
int reg = *(u8*)(pExp->ContextRecord->Rip + 1) - 0x80;
|
|
||||||
int sizes = *(u8*)(pExp->ContextRecord->Rip + 2);
|
|
||||||
int pattern_size = sizes & 0xF;
|
|
||||||
int imm_size = sizes >> 4;
|
|
||||||
|
|
||||||
int64_t tls_offset;
|
|
||||||
if (imm_size == 4) {
|
|
||||||
tls_offset = *(s32*)(pExp->ContextRecord->Rip + pattern_size);
|
|
||||||
} else {
|
|
||||||
tls_offset = *(s64*)(pExp->ContextRecord->Rip + pattern_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
(&pExp->ContextRecord->Rax)[reg] = GetGuestTls(tls_offset); /* GetGuestTls */
|
|
||||||
pExp->ContextRecord->Rip += pattern_size + imm_size;
|
|
||||||
|
|
||||||
return EXCEPTION_CONTINUE_EXECUTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
pExp->ContextRecord->Rip = orig_rip;
|
|
||||||
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
|
|
||||||
switch (ec) {
|
|
||||||
case EXCEPTION_ACCESS_VIOLATION: {
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_ACCESS_VIOLATION ({:#x})", ec);
|
|
||||||
const auto info = pExp->ExceptionRecord->ExceptionInformation;
|
|
||||||
switch (info[0]) {
|
|
||||||
case 0:
|
|
||||||
LOG_CRITICAL(Core, "Read violation at address {:#x}", info[1]);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
LOG_CRITICAL(Core, "Write violation at address {:#x}", info[1]);
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
LOG_CRITICAL(Core, "DEP violation at address {:#x}", info[1]);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_DATATYPE_MISALIGNMENT:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_DATATYPE_MISALIGNMENT ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_FLT_DIVIDE_BY_ZERO:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_ILLEGAL_INSTRUCTION:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_ILLEGAL_INSTRUCTION ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_IN_PAGE_ERROR:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_IN_PAGE_ERROR ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_INT_DIVIDE_BY_ZERO:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_PRIV_INSTRUCTION:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_PRIV_INSTRUCTION ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
case EXCEPTION_STACK_OVERFLOW:
|
|
||||||
LOG_CRITICAL(Core, "Exception EXCEPTION_STACK_OVERFLOW ({:#x})", ec);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return EXCEPTION_CONTINUE_SEARCH;
|
|
||||||
}
|
|
||||||
return EXCEPTION_CONTINUE_SEARCH;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void InstallTlsHandler() {
|
|
||||||
#ifdef _WIN64
|
|
||||||
if (!AddVectoredExceptionHandler(0, ExceptionHandler)) {
|
|
||||||
LOG_CRITICAL(Core, "Failed to register an exception handler");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void PatchTLS(u64 segment_addr, u64 segment_size) {
|
|
||||||
u8* code = reinterpret_cast<u8*>(segment_addr);
|
u8* code = reinterpret_cast<u8*>(segment_addr);
|
||||||
auto remaining_size = segment_size;
|
auto remaining_size = segment_size;
|
||||||
|
|
||||||
|
// Sometimes loads from the FS segment are prefixed with useless operand size prefix bytes like:
|
||||||
|
// |66 66 66| 64 48 8b 04 25 00 # mov rax, qword ptr fs:[0x0]
|
||||||
|
// These are probably ignored by the processor but when patching the instruction to a jump
|
||||||
|
// they cause issues. So look for them and patch them to nop to avoid problems.
|
||||||
|
static constexpr std::array<u8, 3> BadPrefix = {0x66, 0x66, 0x66};
|
||||||
|
|
||||||
while (remaining_size) {
|
while (remaining_size) {
|
||||||
for (const auto& tls_pattern : TlsPatterns) {
|
for (const auto& tls_pattern : TlsPatterns) {
|
||||||
const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size;
|
const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size;
|
||||||
|
@ -153,18 +72,55 @@ void PatchTLS(u64 segment_addr, u64 segment_size) {
|
||||||
if (std::memcmp(code, tls_pattern.pattern, tls_pattern.pattern_size) != 0) {
|
if (std::memcmp(code, tls_pattern.pattern, tls_pattern.pattern_size) != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
u64 offset = 0;
|
||||||
if (tls_pattern.imm_size == 4) {
|
if (tls_pattern.imm_size == 4) {
|
||||||
|
std::memcpy(&offset, code + tls_pattern.pattern_size, sizeof(u32));
|
||||||
LOG_INFO(Core_Linker, "PATTERN32 FOUND at {}, reg: {} offset: {:#x}",
|
LOG_INFO(Core_Linker, "PATTERN32 FOUND at {}, reg: {} offset: {:#x}",
|
||||||
fmt::ptr(code), tls_pattern.target_reg,
|
fmt::ptr(code), tls_pattern.target_reg, offset);
|
||||||
*(u32*)(code + tls_pattern.pattern_size));
|
|
||||||
} else {
|
} else {
|
||||||
|
std::memcpy(&offset, code + tls_pattern.pattern_size, sizeof(u64));
|
||||||
LOG_INFO(Core_Linker, "PATTERN64 FOUND at {}, reg: {} offset: {:#x}",
|
LOG_INFO(Core_Linker, "PATTERN64 FOUND at {}, reg: {} offset: {:#x}",
|
||||||
fmt::ptr(code), tls_pattern.target_reg,
|
fmt::ptr(code), tls_pattern.target_reg, offset);
|
||||||
*(u32*)(code + tls_pattern.pattern_size));
|
|
||||||
}
|
}
|
||||||
code[0] = 0xcd;
|
ASSERT(offset == 0);
|
||||||
code[1] = 0x80 + tls_pattern.target_reg;
|
|
||||||
code[2] = tls_pattern.pattern_size | (tls_pattern.imm_size << 4);
|
// Allocate slot in the process if not done already.
|
||||||
|
if (slot == 0) {
|
||||||
|
slot = TlsAlloc();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace bogus instruction prefix with nops if it exists.
|
||||||
|
if (std::memcmp(code - BadPrefix.size(), BadPrefix.data(), sizeof(BadPrefix)) == 0) {
|
||||||
|
auto patch = Xbyak::CodeGenerator(BadPrefix.size(), code - BadPrefix.size());
|
||||||
|
patch.nop(BadPrefix.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace mov instruction with near jump to the trampoline.
|
||||||
|
static constexpr u32 NearJmpSize = 5;
|
||||||
|
auto patch = Xbyak::CodeGenerator(total_size, code);
|
||||||
|
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
|
||||||
|
patch.nop(total_size - NearJmpSize);
|
||||||
|
|
||||||
|
// Write the trampoline.
|
||||||
|
// The following logic is based on the wine implementation of TlsGetValue
|
||||||
|
// https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719
|
||||||
|
static constexpr u32 TlsSlotsOffset = 0x1480;
|
||||||
|
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
|
||||||
|
static constexpr u32 TlsMinimumAvailable = 64;
|
||||||
|
const u32 teb_offset =
|
||||||
|
slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset;
|
||||||
|
const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable;
|
||||||
|
|
||||||
|
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
|
||||||
|
c.mov(target_reg, teb_offset);
|
||||||
|
c.putSeg(gs);
|
||||||
|
c.mov(target_reg, ptr[target_reg]); // Load the pointer to the table of tls slots.
|
||||||
|
c.mov(
|
||||||
|
target_reg,
|
||||||
|
qword[target_reg + tls_index * sizeof(LPVOID)]); // Load the pointer to our buffer.
|
||||||
|
c.jmp(code + total_size); // Return to the instruction right after the mov.
|
||||||
|
|
||||||
|
// Move ahead in module.
|
||||||
code += total_size - 1;
|
code += total_size - 1;
|
||||||
remaining_size -= total_size - 1;
|
remaining_size -= total_size - 1;
|
||||||
break;
|
break;
|
||||||
|
@ -174,4 +130,16 @@ void PatchTLS(u64 segment_addr, u64 segment_size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void SetTLSStorage(u64 image_address) {
|
||||||
|
UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!");
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
|
||||||
|
UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Core
|
} // namespace Core
|
||||||
|
|
|
@ -5,12 +5,16 @@
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Xbyak {
|
||||||
|
class CodeGenerator;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
/// Installs a host exception handler to handle guest TLS access.
|
/// Sets the data pointer that contains the TLS image.
|
||||||
void InstallTlsHandler();
|
void SetTLSStorage(u64 image_address);
|
||||||
|
|
||||||
/// Patches any instructions that access TLS to trigger the exception handler.
|
/// Patches any instructions that access guest TLS to use provided storage.
|
||||||
void PatchTLS(u64 segment_addr, u64 segment_size);
|
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
|
||||||
|
|
||||||
} // namespace Core
|
} // namespace Core
|
||||||
|
|
|
@ -50,7 +50,6 @@ int main(int argc, char* argv[]) {
|
||||||
|
|
||||||
auto linker = Common::Singleton<Core::Linker>::Instance();
|
auto linker = Common::Singleton<Core::Linker>::Instance();
|
||||||
Libraries::InitHLELibs(&linker->getHLESymbols());
|
Libraries::InitHLELibs(&linker->getHLESymbols());
|
||||||
Core::InstallTlsHandler();
|
|
||||||
linker->LoadModule(path);
|
linker->LoadModule(path);
|
||||||
|
|
||||||
// Check if there is a libc.prx in sce_module folder
|
// Check if there is a libc.prx in sce_module folder
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "core/libraries/videoout/buffer.h"
|
#include "core/libraries/videoout/buffer.h"
|
||||||
|
#include "core/virtual_memory.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
@ -15,7 +17,7 @@
|
||||||
#define PAGE_NOACCESS PROT_NONE
|
#define PAGE_NOACCESS PROT_NONE
|
||||||
#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
|
#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
|
||||||
#else
|
#else
|
||||||
#include <Windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
void mprotect(void* addr, size_t len, int prot) {
|
void mprotect(void* addr, size_t len, int prot) {
|
||||||
DWORD old_prot{};
|
DWORD old_prot{};
|
||||||
|
@ -57,6 +59,7 @@ LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static constexpr u64 StreamBufferSize = 128_MB;
|
static constexpr u64 StreamBufferSize = 128_MB;
|
||||||
|
static constexpr u64 PageShift = 12;
|
||||||
|
|
||||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
||||||
: instance{instance_}, scheduler{scheduler_},
|
: instance{instance_}, scheduler{scheduler_},
|
||||||
|
@ -76,7 +79,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||||
guest_access_fault.sa_mask = signal_mask;
|
guest_access_fault.sa_mask = signal_mask;
|
||||||
sigaction(SIGSEGV, &guest_access_fault, nullptr);
|
sigaction(SIGSEGV, &guest_access_fault, nullptr);
|
||||||
#else
|
#else
|
||||||
veh_handle = AddVectoredExceptionHandler(1, GuestFaultSignalHandler);
|
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
|
||||||
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
|
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
|
||||||
#endif
|
#endif
|
||||||
g_texture_cache = this;
|
g_texture_cache = this;
|
||||||
|
@ -243,8 +246,8 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||||
const u64 num_pages = ((addr + size - 1) >> PageBits) - (addr >> PageBits) + 1;
|
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
||||||
const u64 page_start = addr >> PageBits;
|
const u64 page_start = addr >> PageShift;
|
||||||
const u64 page_end = page_start + num_pages;
|
const u64 page_end = page_start + num_pages;
|
||||||
|
|
||||||
const auto pages_interval =
|
const auto pages_interval =
|
||||||
|
@ -256,8 +259,8 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||||
const auto& range = cached_pages.equal_range(pages_interval);
|
const auto& range = cached_pages.equal_range(pages_interval);
|
||||||
for (const auto& [range, count] : boost::make_iterator_range(range)) {
|
for (const auto& [range, count] : boost::make_iterator_range(range)) {
|
||||||
const auto interval = range & pages_interval;
|
const auto interval = range & pages_interval;
|
||||||
const VAddr interval_start_addr = boost::icl::first(interval) << PageBits;
|
const VAddr interval_start_addr = boost::icl::first(interval) << PageShift;
|
||||||
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageBits;
|
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift;
|
||||||
const u32 interval_size = interval_end_addr - interval_start_addr;
|
const u32 interval_size = interval_end_addr - interval_start_addr;
|
||||||
void* addr = reinterpret_cast<void*>(interval_start_addr);
|
void* addr = reinterpret_cast<void*>(interval_start_addr);
|
||||||
if (delta > 0 && count == delta) {
|
if (delta > 0 && count == delta) {
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <forward_list>
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
|
@ -19,7 +18,12 @@ struct BufferAttributeGroup;
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
class TextureCache {
|
class TextureCache {
|
||||||
static constexpr u64 PageBits = 14;
|
// This is the page shift for adding images into the hash map. It isn't related to
|
||||||
|
// the page size of the guest or the host and is chosen for convenience. A number too
|
||||||
|
// small will increase the number of hash map lookups per image, while too large will
|
||||||
|
// increase the number of images per page.
|
||||||
|
static constexpr u64 PageBits = 20;
|
||||||
|
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||||
|
|
Loading…
Reference in New Issue