Add patches for F16C instructions under Rosetta 2.
This commit is contained in:
parent
905d49fd96
commit
a17150960f
|
@ -82,3 +82,6 @@
|
||||||
path = externals/ffmpeg-core
|
path = externals/ffmpeg-core
|
||||||
url = https://github.com/shadps4-emu/ext-ffmpeg-core.git
|
url = https://github.com/shadps4-emu/ext-ffmpeg-core.git
|
||||||
shallow = true
|
shallow = true
|
||||||
|
[submodule "externals/half"]
|
||||||
|
path = externals/half
|
||||||
|
url = https://github.com/ROCm/half.git
|
||||||
|
|
|
@ -638,6 +638,9 @@ if (APPLE)
|
||||||
|
|
||||||
# Replacement for std::chrono::time_zone
|
# Replacement for std::chrono::time_zone
|
||||||
target_link_libraries(shadps4 PRIVATE date::date-tz)
|
target_link_libraries(shadps4 PRIVATE date::date-tz)
|
||||||
|
|
||||||
|
# Half float conversions for F16C patches
|
||||||
|
target_link_libraries(shadps4 PRIVATE half)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT ENABLE_QT_GUI)
|
if (NOT ENABLE_QT_GUI)
|
||||||
|
|
|
@ -142,12 +142,18 @@ if (WIN32)
|
||||||
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
|
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (APPLE)
|
||||||
|
# half
|
||||||
|
add_library(half INTERFACE)
|
||||||
|
target_include_directories(half INTERFACE half/include)
|
||||||
|
|
||||||
# date
|
# date
|
||||||
if (APPLE AND NOT TARGET date::date-tz)
|
if (NOT TARGET date::date-tz)
|
||||||
option(BUILD_TZ_LIB "" ON)
|
option(BUILD_TZ_LIB "" ON)
|
||||||
option(USE_SYSTEM_TZ_DB "" ON)
|
option(USE_SYSTEM_TZ_DB "" ON)
|
||||||
add_subdirectory(date)
|
add_subdirectory(date)
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# Tracy
|
# Tracy
|
||||||
option(TRACY_ENABLE "" ON)
|
option(TRACY_ENABLE "" ON)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 1ddada225144cac0de8f6b5c0dd9acffd99a2e68
|
|
@ -15,6 +15,7 @@
|
||||||
#else
|
#else
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
#include <half.hpp>
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -30,6 +31,12 @@ static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) {
|
||||||
if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) {
|
if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) {
|
||||||
return Xbyak::Reg64(reg - ZYDIS_REGISTER_RAX + Xbyak::Operand::RAX);
|
return Xbyak::Reg64(reg - ZYDIS_REGISTER_RAX + Xbyak::Operand::RAX);
|
||||||
}
|
}
|
||||||
|
if (reg >= ZYDIS_REGISTER_XMM0 && reg <= ZYDIS_REGISTER_XMM31) {
|
||||||
|
return Xbyak::Xmm(reg - ZYDIS_REGISTER_XMM0 + xmm0.getIdx());
|
||||||
|
}
|
||||||
|
if (reg >= ZYDIS_REGISTER_YMM0 && reg <= ZYDIS_REGISTER_YMM31) {
|
||||||
|
return Xbyak::Ymm(reg - ZYDIS_REGISTER_YMM0 + ymm0.getIdx());
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unsupported register: {}", static_cast<u32>(reg));
|
UNREACHABLE_MSG("Unsupported register: {}", static_cast<u32>(reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,6 +73,12 @@ static Xbyak::Address ZydisToXbyakMemoryOperand(const ZydisDecodedOperand& opera
|
||||||
return ptr[expression];
|
return ptr[expression];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u64 ZydisToXbyakImmediateOperand(const ZydisDecodedOperand& operand) {
|
||||||
|
ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_IMMEDIATE,
|
||||||
|
"Expected immediate operand, got type: {}", static_cast<u32>(operand.type));
|
||||||
|
return operand.imm.value.u;
|
||||||
|
}
|
||||||
|
|
||||||
static std::unique_ptr<Xbyak::Operand> ZydisToXbyakOperand(const ZydisDecodedOperand& operand) {
|
static std::unique_ptr<Xbyak::Operand> ZydisToXbyakOperand(const ZydisDecodedOperand& operand) {
|
||||||
switch (operand.type) {
|
switch (operand.type) {
|
||||||
case ZYDIS_OPERAND_TYPE_REGISTER: {
|
case ZYDIS_OPERAND_TYPE_REGISTER: {
|
||||||
|
@ -110,51 +123,135 @@ static Xbyak::Reg AllocateScratchRegister(
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
|
||||||
static constexpr u32 MaxSavedRegisters = 3;
|
static pthread_key_t stack_pointer_slot;
|
||||||
static pthread_key_t register_save_slots[MaxSavedRegisters];
|
static pthread_key_t patch_stack_slot;
|
||||||
static std::once_flag register_save_init_flag;
|
static std::once_flag patch_context_slots_init_flag;
|
||||||
|
|
||||||
static_assert(sizeof(void*) == sizeof(u64),
|
static_assert(sizeof(void*) == sizeof(u64),
|
||||||
"Cannot fit a register inside a thread local storage slot.");
|
"Cannot fit a register inside a thread local storage slot.");
|
||||||
|
|
||||||
static void InitializeRegisterSaveSlots() {
|
static void InitializePatchContextSlots() {
|
||||||
for (u32 i = 0; i < MaxSavedRegisters; i++) {
|
ASSERT_MSG(pthread_key_create(&stack_pointer_slot, nullptr) == 0,
|
||||||
ASSERT_MSG(pthread_key_create(®ister_save_slots[i], nullptr) == 0,
|
"Unable to allocate thread-local register for stack pointer.");
|
||||||
"Unable to allocate thread-local register save slot {}", i);
|
ASSERT_MSG(pthread_key_create(&patch_stack_slot, nullptr) == 0,
|
||||||
|
"Unable to allocate thread-local register for patch stack.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitializeThreadPatchStack() {
|
||||||
|
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
|
||||||
|
|
||||||
|
const auto* patch_stack = std::malloc(0x1000);
|
||||||
|
pthread_setspecific(patch_stack_slot, patch_stack);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CleanupThreadPatchStack() {
|
||||||
|
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
|
||||||
|
|
||||||
|
auto* patch_stack = pthread_getspecific(patch_stack_slot);
|
||||||
|
if (patch_stack != nullptr) {
|
||||||
|
std::free(patch_stack);
|
||||||
|
pthread_setspecific(patch_stack_slot, nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
|
/// Saves the stack pointer to thread local storage and loads the patch stack.
|
||||||
ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to save {} registers.",
|
static void SaveStack(Xbyak::CodeGenerator& c) {
|
||||||
regs.size());
|
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
|
||||||
|
|
||||||
std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots);
|
|
||||||
|
|
||||||
u32 index = 0;
|
|
||||||
for (const auto& reg : regs) {
|
|
||||||
const auto offset = reinterpret_cast<void*>(register_save_slots[index++] * sizeof(void*));
|
|
||||||
|
|
||||||
|
// Save stack pointer and load patch stack.
|
||||||
c.putSeg(gs);
|
c.putSeg(gs);
|
||||||
c.mov(qword[offset], reg.cvt64());
|
c.mov(qword[reinterpret_cast<void*>(stack_pointer_slot * sizeof(void*))], rsp);
|
||||||
}
|
c.putSeg(gs);
|
||||||
|
c.mov(rsp, qword[reinterpret_cast<void*>(patch_stack_slot * sizeof(void*))]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Restores the stack pointer from thread local storage.
|
||||||
|
static void RestoreStack(Xbyak::CodeGenerator& c) {
|
||||||
|
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
|
||||||
|
|
||||||
|
// Save patch stack pointer and load original stack.
|
||||||
|
c.putSeg(gs);
|
||||||
|
c.mov(qword[reinterpret_cast<void*>(patch_stack_slot * sizeof(void*))], rsp);
|
||||||
|
c.putSeg(gs);
|
||||||
|
c.mov(rsp, qword[reinterpret_cast<void*>(stack_pointer_slot * sizeof(void*))]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// These utilities are not implemented as we can't save anything to thread local storage without
|
||||||
|
// temporary registers.
|
||||||
|
void InitializeThreadPatchStack() {
|
||||||
|
// No-op
|
||||||
|
}
|
||||||
|
|
||||||
|
void CleanupThreadPatchStack() {
|
||||||
|
// No-op
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Saves the stack pointer to thread local storage and loads the patch stack.
|
||||||
|
static void SaveStack(Xbyak::CodeGenerator& c) {
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Restores the stack pointer from thread local storage.
|
||||||
|
static void RestoreStack(Xbyak::CodeGenerator& c) {
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// Switches to the patch stack, saves registers, and restores the original stack.
|
||||||
|
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
|
||||||
|
SaveStack(c);
|
||||||
|
for (const auto& reg : regs) {
|
||||||
|
c.push(reg.cvt64());
|
||||||
|
}
|
||||||
|
RestoreStack(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switches to the patch stack, restores registers, and restores the original stack.
|
||||||
static void RestoreRegisters(Xbyak::CodeGenerator& c,
|
static void RestoreRegisters(Xbyak::CodeGenerator& c,
|
||||||
const std::initializer_list<Xbyak::Reg> regs) {
|
const std::initializer_list<Xbyak::Reg> regs) {
|
||||||
ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to restore {} registers.",
|
SaveStack(c);
|
||||||
regs.size());
|
|
||||||
|
|
||||||
std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots);
|
|
||||||
|
|
||||||
u32 index = 0;
|
|
||||||
for (const auto& reg : regs) {
|
for (const auto& reg : regs) {
|
||||||
const auto offset = reinterpret_cast<void*>(register_save_slots[index++] * sizeof(void*));
|
c.pop(reg.cvt64());
|
||||||
|
}
|
||||||
|
RestoreStack(c);
|
||||||
|
}
|
||||||
|
|
||||||
c.putSeg(gs);
|
/// Switches to the patch stack and stores all registers.
|
||||||
c.mov(reg.cvt64(), qword[offset]);
|
static void SaveContext(Xbyak::CodeGenerator& c) {
|
||||||
|
SaveStack(c);
|
||||||
|
for (int reg = Xbyak::Operand::RAX; reg <= Xbyak::Operand::R15; reg++) {
|
||||||
|
c.push(Xbyak::Reg64(reg));
|
||||||
|
}
|
||||||
|
for (int reg = 0; reg <= 7; reg++) {
|
||||||
|
c.sub(rsp, 32);
|
||||||
|
c.vmovdqu(ptr[rsp], Xbyak::Ymm(reg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Restores all registers and restores the original stack.
|
||||||
|
/// If the destination is a register, it is not restored to preserve the output.
|
||||||
|
static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) {
|
||||||
|
for (int reg = 7; reg >= 0; reg--) {
|
||||||
|
if ((!dst.isXMM() && !dst.isYMM()) || dst.getIdx() != reg) {
|
||||||
|
c.vmovdqu(Xbyak::Ymm(reg), ptr[rsp]);
|
||||||
|
}
|
||||||
|
c.add(rsp, 32);
|
||||||
|
}
|
||||||
|
for (int reg = Xbyak::Operand::R15; reg >= Xbyak::Operand::RAX; reg--) {
|
||||||
|
if (!dst.isREG() || dst.getIdx() != reg) {
|
||||||
|
c.pop(Xbyak::Reg64(reg));
|
||||||
|
} else {
|
||||||
|
c.add(rsp, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RestoreStack(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
|
||||||
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||||
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
|
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
|
||||||
|
@ -204,9 +301,9 @@ static void GenerateBEXTR(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
|
||||||
c.and_(dst, scratch2);
|
c.and_(dst, scratch2);
|
||||||
|
|
||||||
if (dst.getIdx() == shift.getIdx()) {
|
if (dst.getIdx() == shift.getIdx()) {
|
||||||
RestoreRegisters(c, {scratch1, scratch2});
|
RestoreRegisters(c, {scratch2, scratch1});
|
||||||
} else {
|
} else {
|
||||||
RestoreRegisters(c, {scratch1, scratch2, shift});
|
RestoreRegisters(c, {shift, scratch2, scratch1});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,10 +355,138 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
|
||||||
RestoreRegisters(c, {scratch});
|
RestoreRegisters(c, {scratch});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterRosetta2Only(const ZydisDecodedOperand*) {
|
static __attribute__((sysv_abi)) void PerformVCVTPH2PS(float* out, const half_float::half* in,
|
||||||
|
const u32 count) {
|
||||||
|
for (u32 i = 0; i < count; i++) {
|
||||||
|
out[i] = half_float::half_cast<float>(in[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||||
|
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||||
|
const auto src = ZydisToXbyakOperand(operands[1]);
|
||||||
|
|
||||||
|
const auto float_count = dst.getBit() / 32;
|
||||||
|
const auto byte_count = float_count * 4;
|
||||||
|
|
||||||
|
SaveContext(c);
|
||||||
|
|
||||||
|
// Allocate stack space for outputs and load into first parameter.
|
||||||
|
c.sub(rsp, byte_count);
|
||||||
|
c.mov(rdi, rsp);
|
||||||
|
|
||||||
|
if (src->isXMM()) {
|
||||||
|
// Allocate stack space for inputs and load into second parameter.
|
||||||
|
c.sub(rsp, byte_count);
|
||||||
|
c.mov(rsi, rsp);
|
||||||
|
|
||||||
|
// Move input to the allocated space.
|
||||||
|
c.movdqu(ptr[rsp], *reinterpret_cast<Xbyak::Xmm*>(src.get()));
|
||||||
|
} else {
|
||||||
|
c.lea(rsi, src->getAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load float count into third parameter.
|
||||||
|
c.mov(rdx, float_count);
|
||||||
|
|
||||||
|
c.mov(rax, reinterpret_cast<u64>(PerformVCVTPH2PS));
|
||||||
|
c.call(rax);
|
||||||
|
|
||||||
|
if (src->isXMM()) {
|
||||||
|
// Clean up after inputs space.
|
||||||
|
c.add(rsp, byte_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load outputs into destination register and clean up space.
|
||||||
|
if (dst.isYMM()) {
|
||||||
|
c.vmovdqu(*reinterpret_cast<const Xbyak::Ymm*>(&dst), ptr[rsp]);
|
||||||
|
} else {
|
||||||
|
c.movdqu(*reinterpret_cast<const Xbyak::Xmm*>(&dst), ptr[rsp]);
|
||||||
|
}
|
||||||
|
c.add(rsp, byte_count);
|
||||||
|
|
||||||
|
RestoreContext(c, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
using SingleToHalfFloatConverter = half_float::half (*)(float);
|
||||||
|
static const SingleToHalfFloatConverter SingleToHalfFloatConverters[4] = {
|
||||||
|
half_float::half_cast<half_float::half, std::round_to_nearest, float>,
|
||||||
|
half_float::half_cast<half_float::half, std::round_toward_neg_infinity, float>,
|
||||||
|
half_float::half_cast<half_float::half, std::round_toward_infinity, float>,
|
||||||
|
half_float::half_cast<half_float::half, std::round_toward_zero, float>,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __attribute__((sysv_abi)) void PerformVCVTPS2PH(half_float::half* out, const float* in,
|
||||||
|
const u32 count, const u8 rounding_mode) {
|
||||||
|
const auto conversion_func = SingleToHalfFloatConverters[rounding_mode];
|
||||||
|
|
||||||
|
for (u32 i = 0; i < count; i++) {
|
||||||
|
out[i] = conversion_func(in[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||||
|
const auto dst = ZydisToXbyakOperand(operands[0]);
|
||||||
|
const auto src = ZydisToXbyakRegisterOperand(operands[1]);
|
||||||
|
const auto ctrl = ZydisToXbyakImmediateOperand(operands[2]);
|
||||||
|
|
||||||
|
const auto float_count = src.getBit() / 32;
|
||||||
|
const auto byte_count = float_count * 4;
|
||||||
|
|
||||||
|
SaveContext(c);
|
||||||
|
|
||||||
|
if (dst->isXMM()) {
|
||||||
|
// Allocate stack space for outputs and load into first parameter.
|
||||||
|
c.sub(rsp, byte_count);
|
||||||
|
c.mov(rdi, rsp);
|
||||||
|
} else {
|
||||||
|
c.lea(rdi, dst->getAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate stack space for inputs and load into second parameter.
|
||||||
|
c.sub(rsp, byte_count);
|
||||||
|
c.mov(rsi, rsp);
|
||||||
|
|
||||||
|
// Move input to the allocated space.
|
||||||
|
if (src.isYMM()) {
|
||||||
|
c.vmovdqu(ptr[rsp], *reinterpret_cast<const Xbyak::Ymm*>(&src));
|
||||||
|
} else {
|
||||||
|
c.movdqu(ptr[rsp], *reinterpret_cast<const Xbyak::Xmm*>(&src));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load float count into third parameter.
|
||||||
|
c.mov(rdx, float_count);
|
||||||
|
|
||||||
|
// Load rounding mode into fourth parameter.
|
||||||
|
if (ctrl & 4) {
|
||||||
|
// Load from MXCSR.RC.
|
||||||
|
c.stmxcsr(ptr[rsp - 4]);
|
||||||
|
c.mov(rcx, ptr[rsp - 4]);
|
||||||
|
c.shr(rcx, 13);
|
||||||
|
c.and_(rcx, 3);
|
||||||
|
} else {
|
||||||
|
c.mov(rcx, ctrl & 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
c.mov(rax, reinterpret_cast<u64>(PerformVCVTPS2PH));
|
||||||
|
c.call(rax);
|
||||||
|
|
||||||
|
// Clean up after inputs space.
|
||||||
|
c.add(rsp, byte_count);
|
||||||
|
|
||||||
|
if (dst->isXMM()) {
|
||||||
|
// Load outputs into destination register and clean up space.
|
||||||
|
c.movdqu(*reinterpret_cast<Xbyak::Xmm*>(dst.get()), ptr[rsp]);
|
||||||
|
c.add(rsp, byte_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
RestoreContext(c, *dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool FilterRosetta2Only(const ZydisDecodedOperand*) {
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
size_t size = sizeof(ret);
|
size_t size = sizeof(ret);
|
||||||
if (sysctlbyname("sysctl.proc_translated", &ret, &size, NULL, 0) != 0) {
|
if (sysctlbyname("sysctl.proc_translated", &ret, &size, nullptr, 0) != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -339,12 +564,16 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
// BMI1 instructions that are not supported by Rosetta 2 on Apple Silicon.
|
// Patches for instruction sets not supported by Rosetta 2.
|
||||||
|
// BMI1
|
||||||
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},
|
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},
|
||||||
{ZYDIS_MNEMONIC_BEXTR, {FilterRosetta2Only, GenerateBEXTR, true}},
|
{ZYDIS_MNEMONIC_BEXTR, {FilterRosetta2Only, GenerateBEXTR, true}},
|
||||||
{ZYDIS_MNEMONIC_BLSI, {FilterRosetta2Only, GenerateBLSI, true}},
|
{ZYDIS_MNEMONIC_BLSI, {FilterRosetta2Only, GenerateBLSI, true}},
|
||||||
{ZYDIS_MNEMONIC_BLSMSK, {FilterRosetta2Only, GenerateBLSMSK, true}},
|
{ZYDIS_MNEMONIC_BLSMSK, {FilterRosetta2Only, GenerateBLSMSK, true}},
|
||||||
{ZYDIS_MNEMONIC_BLSR, {FilterRosetta2Only, GenerateBLSR, true}},
|
{ZYDIS_MNEMONIC_BLSR, {FilterRosetta2Only, GenerateBLSR, true}},
|
||||||
|
// F16C
|
||||||
|
{ZYDIS_MNEMONIC_VCVTPH2PS, {FilterRosetta2Only, GenerateVCVTPH2PS, true}},
|
||||||
|
{ZYDIS_MNEMONIC_VCVTPS2PH, {FilterRosetta2Only, GenerateVCVTPS2PH, true}},
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,12 @@ class CodeGenerator;
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
|
/// Initializes a stack for the current thread for use by patch implementations.
|
||||||
|
void InitializeThreadPatchStack();
|
||||||
|
|
||||||
|
/// Cleans up the patch stack for the current thread.
|
||||||
|
void CleanupThreadPatchStack();
|
||||||
|
|
||||||
/// Patches CPU instructions that cannot run as-is on the host.
|
/// Patches CPU instructions that cannot run as-is on the host.
|
||||||
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
|
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
|
#include "core/cpu_patches.h"
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/kernel/libkernel.h"
|
#include "core/libraries/kernel/libkernel.h"
|
||||||
#include "core/libraries/kernel/thread_management.h"
|
#include "core/libraries/kernel/thread_management.h"
|
||||||
|
@ -985,6 +986,7 @@ static void cleanup_thread(void* arg) {
|
||||||
destructor(value);
|
destructor(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Core::CleanupThreadPatchStack();
|
||||||
thread->is_almost_done = true;
|
thread->is_almost_done = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -993,6 +995,7 @@ static void* run_thread(void* arg) {
|
||||||
Common::SetCurrentThreadName(thread->name.c_str());
|
Common::SetCurrentThreadName(thread->name.c_str());
|
||||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||||
linker->InitTlsForThread(false);
|
linker->InitTlsForThread(false);
|
||||||
|
Core::InitializeThreadPatchStack();
|
||||||
void* ret = nullptr;
|
void* ret = nullptr;
|
||||||
g_pthread_self = thread;
|
g_pthread_self = thread;
|
||||||
pthread_cleanup_push(cleanup_thread, thread);
|
pthread_cleanup_push(cleanup_thread, thread);
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
#include "core/aerolib/aerolib.h"
|
#include "core/aerolib/aerolib.h"
|
||||||
#include "core/aerolib/stubs.h"
|
#include "core/aerolib/stubs.h"
|
||||||
|
#include "core/cpu_patches.h"
|
||||||
#include "core/libraries/kernel/memory_management.h"
|
#include "core/libraries/kernel/memory_management.h"
|
||||||
#include "core/libraries/kernel/thread_management.h"
|
#include "core/libraries/kernel/thread_management.h"
|
||||||
#include "core/linker.h"
|
#include "core/linker.h"
|
||||||
|
@ -86,6 +87,7 @@ void Linker::Execute() {
|
||||||
Common::SetCurrentThreadName("GAME_MainThread");
|
Common::SetCurrentThreadName("GAME_MainThread");
|
||||||
Libraries::Kernel::pthreadInitSelfMainThread();
|
Libraries::Kernel::pthreadInitSelfMainThread();
|
||||||
InitTlsForThread(true);
|
InitTlsForThread(true);
|
||||||
|
InitializeThreadPatchStack();
|
||||||
|
|
||||||
// Start shared library modules
|
// Start shared library modules
|
||||||
for (auto& m : m_modules) {
|
for (auto& m : m_modules) {
|
||||||
|
@ -104,6 +106,8 @@ void Linker::Execute() {
|
||||||
RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc);
|
RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CleanupThreadPatchStack();
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
|
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
|
||||||
|
|
Loading…
Reference in New Issue