diff --git a/.gitmodules b/.gitmodules index 6a73ffa1..3a9d8f42 100644 --- a/.gitmodules +++ b/.gitmodules @@ -58,3 +58,6 @@ [submodule "externals/ext-boost"] path = externals/ext-boost url = https://github.com/shadps4-emu/ext-boost.git +[submodule "externals/date"] + path = externals/date + url = https://github.com/HowardHinnant/date.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fc91042..1c2a7265 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,10 @@ cmake_minimum_required(VERSION 3.16.3) set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED True) +if(APPLE) + enable_language(OBJC) +endif() + if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() @@ -62,6 +66,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") find_package(Boost 1.84.0 CONFIG) find_package(cryptopp 8.9.0 MODULE) +find_package(date 3.0.1 CONFIG) find_package(fmt 10.2.1 CONFIG) find_package(glslang 14.2.0 CONFIG) find_package(magic_enum 0.9.6 CONFIG) @@ -75,6 +80,13 @@ find_package(xxHash 0.8.2 MODULE) find_package(zlib-ng 2.2.0 MODULE) find_package(Zydis 4.1.0 CONFIG) +include(CheckSymbolExists) +check_symbol_exists(pthread_mutex_timedlock "pthread.h" HAVE_PTHREAD_MUTEX_TIMEDLOCK) +# Windows always has the function through winpthreads +if(HAVE_PTHREAD_MUTEX_TIMEDLOCK OR WIN32) + add_compile_options(-DHAVE_PTHREAD_MUTEX_TIMEDLOCK) +endif() + add_subdirectory(externals) include_directories(src) @@ -295,7 +307,9 @@ set(CORE src/core/aerolib/stubs.cpp src/core/file_format/splash.h src/core/file_format/splash.cpp src/core/file_sys/fs.cpp - src/core/file_sys/fs.h + src/core/file_sys/fs.h + src/core/instruction_emulator.cpp + src/core/instruction_emulator.h src/core/loader.cpp src/core/loader.h src/core/loader/dwarf.cpp @@ -539,9 +553,15 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient date::date-tz) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) +if (APPLE) + # Link MoltenVK for Vulkan support + find_library(MOLTENVK MoltenVK REQUIRED) + target_link_libraries(shadps4 PRIVATE ${MOLTENVK}) +endif() + if (NOT ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE SDL3::SDL3) endif() diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index ae9f0856..5e619495 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -134,6 +134,13 @@ if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") endif() +# date +if (NOT TARGET date::date-tz) + option(BUILD_TZ_LIB "" ON) + option(USE_SYSTEM_TZ_DB "" ON) + add_subdirectory(date) +endif() + # Tracy option(TRACY_ENABLE "" ON) option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash diff --git a/externals/date b/externals/date new file mode 160000 index 00000000..1ead6715 --- /dev/null +++ b/externals/date @@ -0,0 +1 @@ +Subproject commit 1ead6715dec030d340a316c927c877a3c4e5a00c diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 13db7eb5..f08b36fa 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -9,6 +9,7 @@ #include "common/thread.h" #ifdef __APPLE__ #include +#include #elif defined(_WIN32) #include #include "common/string_util.h" diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 03c755e0..59e6367e 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -245,14 +245,20 @@ struct AddressSpace::Impl { Impl() { // Allocate virtual address placeholder for our address space. void* hint_address = reinterpret_cast(SYSTEM_MANAGED_MIN); +#ifdef __APPLE__ + constexpr int virtual_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; +#else + constexpr int virtual_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED; +#endif virtual_size = SystemSize + UserSize; virtual_base = reinterpret_cast( - mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0)); + mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE, virtual_flags, -1, 0)); if (virtual_base == MAP_FAILED) { LOG_CRITICAL(Kernel_Vmm, "mmap failed: {}", strerror(errno)); throw std::bad_alloc{}; } + +#ifndef __APPLE__ madvise(virtual_base, virtual_size, MADV_HUGEPAGE); backing_fd = memfd_create("BackingDmem", 0); @@ -260,6 +266,15 @@ struct AddressSpace::Impl { LOG_CRITICAL(Kernel_Vmm, "memfd_create failed: {}", strerror(errno)); throw std::bad_alloc{}; } +#else + const auto shm_path = fmt::format("/BackingDmem{}", getpid()); + backing_fd = shm_open(shm_path.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600); + if (backing_fd < 0) { + LOG_CRITICAL(Kernel_Vmm, "shm_open failed: {}", strerror(errno)); + throw std::bad_alloc{}; + } + shm_unlink(shm_path.c_str()); +#endif // Defined to extend the file with zeros int ret = ftruncate(backing_fd, BackingSize); diff --git a/src/core/instruction_emulator.cpp b/src/core/instruction_emulator.cpp new file mode 100644 index 00000000..b0bf4146 --- /dev/null +++ b/src/core/instruction_emulator.cpp @@ -0,0 +1,291 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "instruction_emulator.h" + +namespace Core { + +static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) { + if (reg >= ZYDIS_REGISTER_EAX && reg <= ZYDIS_REGISTER_R15D) { + return Xbyak::Reg32(reg - ZYDIS_REGISTER_EAX); + } else if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) { + return Xbyak::Reg64(reg - ZYDIS_REGISTER_RAX); + } else { + UNREACHABLE_MSG("Unsupported register: {}", static_cast(reg)); + } +} + +static Xbyak::Reg ZydisToXbyakRegisterOperand(const ZydisDecodedOperand& operand) { + ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_REGISTER, "Expected register operand, got type: {}", static_cast(operand.type)); + + return ZydisToXbyakRegister(operand.reg.value); +} + +static Xbyak::Address ZydisToXbyakMemoryOperand(const ZydisDecodedOperand& operand) { + ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand, got type: {}", static_cast(operand.type)); + + Xbyak::RegExp expression{}; + if (operand.mem.base != ZYDIS_REGISTER_NONE) { + expression = expression + ZydisToXbyakRegister(operand.mem.base); + } + if (operand.mem.index != ZYDIS_REGISTER_NONE) { + if (operand.mem.scale != 0) { + expression = expression + ZydisToXbyakRegister(operand.mem.index) * operand.mem.scale; + } else { + expression = expression + ZydisToXbyakRegister(operand.mem.index); + } + } + if (operand.mem.disp.size != 0 && operand.mem.disp.value != 0) { + expression = expression + operand.mem.disp.value; + } + + return Xbyak::util::ptr[expression]; +} + +static std::unique_ptr ZydisToXbyakOperand(const ZydisDecodedOperand& operand) { + switch (operand.type) { + case ZYDIS_OPERAND_TYPE_REGISTER: { + return std::make_unique(ZydisToXbyakRegisterOperand(operand)); + } + case ZYDIS_OPERAND_TYPE_MEMORY: { + return std::make_unique(ZydisToXbyakMemoryOperand(operand)); + } + default: + UNREACHABLE_MSG("Unsupported operand type: {}", static_cast(operand.type)); + } +} + +#ifdef __APPLE__ + +static bool OperandUsesRegister(const Xbyak::Operand* operand, int index) { + if (operand->isREG()) { + return operand->getIdx() == index; + } + if (operand->isMEM()) { + const Xbyak::RegExp& reg_exp = operand->getAddress().getRegExp(); + return reg_exp.getBase().getIdx() == index || reg_exp.getIndex().getIdx() == index; + } + UNREACHABLE_MSG("Unsupported operand kind: {}", static_cast(operand->getKind())); +} + +static bool IsRegisterAllocated(const std::initializer_list& allocated_registers, const int index) { + return std::ranges::find_if( + allocated_registers.begin(), allocated_registers.end(), + [index](const Xbyak::Operand* operand) { return OperandUsesRegister(operand, index); }) != allocated_registers.end(); +} + +static Xbyak::Reg AllocateScratchRegister(const std::initializer_list allocated_registers, const u32 bits) { + for (int index = Xbyak::Operand::R8; index <= Xbyak::Operand::R15; index++) { + if (!IsRegisterAllocated(allocated_registers, index)) { + return Xbyak::Reg32e(index, static_cast(bits)); + } + } + UNREACHABLE_MSG("Out of scratch registers!"); +} + +static constexpr u32 MaxSavedRegisters = 3; +static pthread_key_t register_save_slots[MaxSavedRegisters]; +static std::once_flag register_save_init_flag; + +static_assert(sizeof(void*) == sizeof(u64), "Cannot fit a register inside a thread local storage slot."); + +static void InitializeRegisterSaveSlots() { + for (u32 i = 0; i < MaxSavedRegisters; i++) { + ASSERT_MSG(pthread_key_create(®ister_save_slots[i], nullptr) == 0, + "Unable to allocate thread-local register save slot {}", i); + } +} + +static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list regs) { + ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to save {} registers.", regs.size()); + + std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots); + + u32 index = 0; + for (const auto& reg : regs) { + const auto offset = reinterpret_cast(register_save_slots[index++] * sizeof(void*)); + + c.putSeg(Xbyak::util::gs); + c.mov(Xbyak::util::qword[offset], reg.cvt64()); + } +} + +static void RestoreRegisters(Xbyak::CodeGenerator& c, const std::initializer_list regs) { + ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to restore {} registers.", regs.size()); + + std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots); + + u32 index = 0; + for (const auto& reg : regs) { + const auto offset = reinterpret_cast(register_save_slots[index++] * sizeof(void*)); + + c.putSeg(Xbyak::util::gs); + c.mov(reg.cvt64(), Xbyak::util::qword[offset]); + } +} + +static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + const auto src1 = ZydisToXbyakRegisterOperand(operands[1]); + const auto src2 = ZydisToXbyakOperand(operands[2]); + + const auto scratch = AllocateScratchRegister({&dst, &src1, src2.get()}, dst.getBit()); + + SaveRegisters(c, {scratch}); + + c.mov(scratch, src1); + c.not_(scratch); + c.and_(scratch, *src2); + c.mov(dst, scratch); + + RestoreRegisters(c, {scratch}); +} + +static void GenerateBEXTR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + const auto src = ZydisToXbyakOperand(operands[1]); + const auto start_len = ZydisToXbyakRegisterOperand(operands[2]); + + const Xbyak::Reg32e shift(Xbyak::Operand::RCX, static_cast(start_len.getBit())); + const auto scratch1 = AllocateScratchRegister({&dst, src.get(), &start_len, &shift}, dst.getBit()); + const auto scratch2 = AllocateScratchRegister({&dst, src.get(), &start_len, &shift, &scratch1}, dst.getBit()); + + if (dst.getIdx() == shift.getIdx()) { + SaveRegisters(c, {scratch1, scratch2}); + } else { + SaveRegisters(c, {scratch1, scratch2, shift}); + } + + c.mov(scratch1, *src); + if (shift.getIdx() != start_len.getIdx()) { + c.mov(shift, start_len); + } + + c.shr(scratch1, shift.cvt8()); + c.shr(shift, 8); + c.mov(scratch2, 1); + c.shl(scratch2, shift.cvt8()); + c.dec(scratch2); + + c.mov(dst, scratch1); + c.and_(dst, scratch2); + + if (dst.getIdx() == shift.getIdx()) { + RestoreRegisters(c, {scratch1, scratch2}); + } else { + RestoreRegisters(c, {scratch1, scratch2, shift}); + } +} + +static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + const auto src = ZydisToXbyakOperand(operands[1]); + + const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit()); + + SaveRegisters(c, {scratch}); + + c.mov(scratch, *src); + c.neg(scratch); + c.and_(scratch, *src); + c.mov(dst, scratch); + + RestoreRegisters(c, {scratch}); +} + +static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + const auto src = ZydisToXbyakOperand(operands[1]); + + const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit()); + + SaveRegisters(c, {scratch}); + + c.mov(scratch, *src); + c.dec(scratch); + c.xor_(scratch, *src); + c.mov(dst, scratch); + + RestoreRegisters(c, {scratch}); +} + +static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + const auto src = ZydisToXbyakOperand(operands[1]); + + const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit()); + + SaveRegisters(c, {scratch}); + + c.mov(scratch, *src); + c.dec(scratch); + c.and_(scratch, *src); + c.mov(dst, scratch); + + RestoreRegisters(c, {scratch}); +} + +#endif + +using InstructionGenerator = void(*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&); +static const std::unordered_map InstructionGenerators = { +#ifdef __APPLE__ + // BMI1 instructions that are not supported by Rosetta 2 on Apple Silicon. + {ZYDIS_MNEMONIC_ANDN, &GenerateANDN}, + {ZYDIS_MNEMONIC_BEXTR, &GenerateBEXTR}, + {ZYDIS_MNEMONIC_BLSI, &GenerateBLSI}, + {ZYDIS_MNEMONIC_BLSMSK, &GenerateBLSMSK}, + {ZYDIS_MNEMONIC_BLSR, &GenerateBLSR}, +#endif +}; + +void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { + if (InstructionGenerators.empty()) { + // Nothing to patch on this platform. + return; + } + + ZydisDecoder instr_decoder; + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); + + u8* code = reinterpret_cast(segment_addr); + u8* end = code + segment_size; + while (code < end) { + ZyanStatus status = + ZydisDecoderDecodeFull(&instr_decoder, code, end - code, &instruction, operands); + if (!ZYAN_SUCCESS(status)) { + code++; + continue; + } + + if (InstructionGenerators.contains(instruction.mnemonic)) { + LOG_DEBUG(Core, "Replacing instruction '{}' at: {}", ZydisMnemonicGetString(instruction.mnemonic), + fmt::ptr(code)); + + // Replace instruction with near jump to the trampoline. + static constexpr u32 NearJmpSize = 5; + ASSERT_MSG(instruction.length >= NearJmpSize, "Instruction {} with length {} is too short to replace at: {}", + ZydisMnemonicGetString(instruction.mnemonic), instruction.length, fmt::ptr(code)); + + auto patch = Xbyak::CodeGenerator(instruction.length, code); + patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR); + patch.nop(instruction.length - NearJmpSize); + + auto generator = InstructionGenerators.at(instruction.mnemonic); + generator(operands, c); + c.jmp(code + instruction.length); // Return to the following instruction. + } + + code += instruction.length; + } +} + +} // namespace Loader diff --git a/src/core/instruction_emulator.h b/src/core/instruction_emulator.h new file mode 100644 index 00000000..023fd62b --- /dev/null +++ b/src/core/instruction_emulator.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Xbyak { +class CodeGenerator; +} + +namespace Core { + +void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c); + +} // namespace Core diff --git a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp index fe0f34b8..ed01d7cc 100644 --- a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp @@ -1,7 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include +#include "core/libraries/error_codes.h" #include "event_flag_obj.h" namespace Libraries::Kernel { diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 7d5163cf..6bd88459 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "core/libraries/kernel/event_queue.h" diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 16274236..c1574670 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -3,11 +3,13 @@ #include #include +#include #include #include "common/assert.h" #include "common/logging/log.h" +#include "common/polyfill_thread.h" #include "common/singleton.h" #include "common/thread.h" #include "core/file_format/psf.h" @@ -56,7 +58,7 @@ static void KernelServiceThread(std::stop_token stoken) { HLE_TRACE; { std::unique_lock lock{m_asio_req}; - cv_asio_req.wait(lock, stoken, [] { return asio_requests != 0; }); + Common::CondvarWait(cv_asio_req, lock, stoken, [] { return asio_requests != 0; }); } if (stoken.stop_requested()) { break; @@ -180,7 +182,7 @@ s64 PS4_SYSV_ABI ps4__write(int d, const void* buf, std::size_t nbytes) { int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, struct OrbisTimesec* st, unsigned long* dst_sec) { LOG_TRACE(Kernel, "Called"); - const auto* time_zone = std::chrono::current_zone(); + const auto* time_zone = date::current_zone(); auto info = time_zone->get_info(std::chrono::system_clock::now()); *local_time = info.offset.count() + info.save.count() * 60 + time; diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 92fde960..5a437e19 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -74,7 +74,12 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}", searchStart, searchEnd, alignment); auto* memory = Core::Memory::Instance(); - return memory->DirectQueryAvailable(searchStart, searchEnd, alignment, physAddrOut, sizeOut); + + PAddr physAddr; + s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); + *physAddrOut = static_cast(physAddr); + + return size; } s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 943098e8..e536412f 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -848,6 +848,37 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, return result; } +#ifndef HAVE_PTHREAD_MUTEX_TIMEDLOCK +static int pthread_mutex_timedlock(pthread_mutex_t* mutex, const struct timespec* abstime) { + int rc; + while ((rc = pthread_mutex_trylock(mutex)) == EBUSY) { + struct timespec curr_time; + clock_gettime(CLOCK_REALTIME, &curr_time); + + s64 remaining_ns = 0; + remaining_ns += + (static_cast(abstime->tv_sec) - static_cast(curr_time.tv_sec)) * 1000000000L; + remaining_ns += static_cast(abstime->tv_nsec) - static_cast(curr_time.tv_nsec); + + if (remaining_ns <= 0) { + return ETIMEDOUT; + } + + struct timespec sleep_time; + sleep_time.tv_sec = 0; + if (remaining_ns < 5000000L) { + sleep_time.tv_nsec = remaining_ns; + } else { + sleep_time.tv_nsec = 5000000; + } + + nanosleep(&sleep_time, nullptr); + } + + return rc; +} +#endif + int PS4_SYSV_ABI scePthreadMutexTimedlock(ScePthreadMutex* mutex, u64 usec) { mutex = createMutex(mutex); if (mutex == nullptr) { @@ -1232,7 +1263,10 @@ int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* using Destructor = void (*)(void*); int PS4_SYSV_ABI posix_pthread_key_create(u32* key, Destructor func) { - return pthread_key_create(key, func); + pthread_key_t thread_key; + int rc = pthread_key_create(&thread_key, func); + *key = static_cast(thread_key); + return rc; } int PS4_SYSV_ABI posix_pthread_setspecific(int key, const void* value) { diff --git a/src/core/libraries/kernel/threads/keys.cpp b/src/core/libraries/kernel/threads/keys.cpp index 78646833..cf5104d2 100644 --- a/src/core/libraries/kernel/threads/keys.cpp +++ b/src/core/libraries/kernel/threads/keys.cpp @@ -12,7 +12,10 @@ int PS4_SYSV_ABI scePthreadKeyCreate(OrbisPthreadKey* key, PthreadKeyDestructor return ORBIS_KERNEL_ERROR_EINVAL; } - int result = pthread_key_create(key, nullptr); + pthread_key_t thread_key; + int result = pthread_key_create(&thread_key, nullptr); + *key = static_cast(thread_key); + if (destructor) { auto thread = scePthreadSelf(); thread->key_destructors.emplace_back(*key, destructor); diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index d0c0a1e4..bc1617d3 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -15,6 +15,7 @@ #include "common/ntapi.h" #else +#include #include #include #endif diff --git a/src/core/memory.h b/src/core/memory.h index e584afab..a11af0ad 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -132,6 +132,11 @@ public: return total_flexible_size - flexible_usage; } + /// Returns the offset of the mapped virtual memory base from where it usually would be mapped. + [[nodiscard]] u64 VirtualOffset() noexcept { + return impl.VirtualBase() - SYSTEM_MANAGED_MIN; + } + PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, int memory_type); diff --git a/src/core/module.cpp b/src/core/module.cpp index 86893f61..70ae4425 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -7,6 +7,7 @@ #include "common/logging/log.h" #include "common/string_util.h" #include "core/aerolib/aerolib.h" +#include "core/instruction_emulator.h" #include "core/loader/dwarf.h" #include "core/memory.h" #include "core/module.h" @@ -84,8 +85,9 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { // Map module segments (and possible TLS trampolines) void** out_addr = reinterpret_cast(&base_virtual_addr); - memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize, - MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true); + memory->MapMemory(out_addr, memory->VirtualOffset() + LoadAddress, + aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, + MemoryMapFlags::Fixed, VMAType::Code, name, true); LoadAddress += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); // Initialize trampoline generator. @@ -131,6 +133,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { add_segment(elf_pheader[i]); if (elf_pheader[i].p_flags & PF_EXEC) { PatchTLS(segment_addr, segment_file_size, c); + PatchInstructions(segment_addr, segment_file_size, c); } break; } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index 0c2d973b..d220638e 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -8,7 +8,7 @@ #ifdef _WIN32 #include -#else +#elif !defined(__APPLE__) #include /* Definition of ARCH_* constants */ #include /* Definition of SYS_* constants */ #endif @@ -88,6 +88,51 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe c.jmp(code + total_size); // Return to the instruction right after the mov. } +#elif defined(__APPLE__) + +static pthread_key_t slot = 0; +static std::once_flag slot_alloc_flag; + +static void AllocTcbKey() { + ASSERT(pthread_key_create(&slot, nullptr) == 0); +} + +void SetTcbBase(void* image_address) { + std::call_once(slot_alloc_flag, &AllocTcbKey); + ASSERT(pthread_setspecific(slot, image_address) == 0); +} + +Tcb* GetTcbBase() { + std::call_once(slot_alloc_flag, &AllocTcbKey); + return reinterpret_cast(pthread_getspecific(slot)); +} + +static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGenerator& c) { + using namespace Xbyak::util; + const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size; + + // Allocate slot in the process if not done already. + std::call_once(slot_alloc_flag, &AllocTcbKey); + + static constexpr u32 NearJmpSize = 5; + + // Replace fs read with gs read. + auto patch = Xbyak::CodeGenerator(total_size, code); + patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR); + patch.nop(total_size - NearJmpSize); + + // Write the trampoline. + const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg); + + // The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by pthread_getspecific + // https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96 + c.putSeg(gs); + c.mov(target_reg, qword[reinterpret_cast(slot * sizeof(void*))]); // Load the slot data. + + // Return to the instruction right after the mov. + c.jmp(code + total_size); +} + #else static u32 slot = 0; @@ -110,7 +155,6 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe // Replace fs read with gs read. auto patch = Xbyak::CodeGenerator(total_size, code); - const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg); patch.putSeg(gs); } @@ -148,11 +192,6 @@ void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { } ASSERT(offset == 0); - // Allocate slot in the process if not done already. - if (slot == 0) { - AllocTcbKey(); - } - // Replace bogus instruction prefix with nops if it exists. if (std::memcmp(code - BadPrefix.size(), BadPrefix.data(), sizeof(BadPrefix)) == 0) { auto patch = Xbyak::CodeGenerator(BadPrefix.size(), code - BadPrefix.size()); diff --git a/src/emulator.cpp b/src/emulator.cpp index 4e669e08..8a70044c 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -15,6 +15,7 @@ #include "common/logging/backend.h" #include "common/ntapi.h" #include "common/path_util.h" +#include "common/polyfill_thread.h" #include "common/singleton.h" #include "common/version.h" #include "core/file_sys/fs.h" diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index d4da268a..e67408b9 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -12,6 +12,10 @@ #include "input/controller.h" #include "sdl_window.h" +#ifdef __APPLE__ +#include +#endif + namespace Frontend { WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_) @@ -55,6 +59,9 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ window_info.render_surface = SDL_GetProperty(SDL_GetWindowProperties(window), SDL_PROP_WINDOW_WAYLAND_SURFACE_POINTER, NULL); } +#elif defined(SDL_PLATFORM_MACOS) + window_info.type = WindowSystemType::Metal; + window_info.render_surface = SDL_Metal_GetLayer(SDL_Metal_CreateView(window)); #endif } diff --git a/src/sdl_window.h b/src/sdl_window.h index 13ee7864..6e14fbd0 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -19,6 +19,7 @@ enum class WindowSystemType : u8 { Windows, X11, Wayland, + Metal, }; struct WindowSystemInfo { diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 11567c1f..81e4093f 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index d36142ad..ab7ad241 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/polyfill_thread.h" #include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -31,7 +32,7 @@ void Liverpool::Process(std::stop_token stoken) { while (!stoken.stop_requested()) { { std::unique_lock lk{submit_mutex}; - submit_cv.wait(lk, stoken, [this] { return num_submits != 0; }); + Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; }); } if (stoken.stop_requested()) { break; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index bffec92b..b87c80ed 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -12,6 +12,7 @@ #include #include "common/assert.h" #include "common/bit_field.h" +#include "common/polyfill_thread.h" #include "common/types.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 4d80ae29..eded2de3 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -460,7 +460,7 @@ struct PM4CmdWriteData { template void Address(T addr) { - addr64 = reinterpret_cast(addr); + addr64 = static_cast(addr); } template diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 1a9a2185..430cb73d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -292,10 +292,14 @@ bool Instance::CreateDevice() { const bool has_host_time_domain = std::find(time_domains.cbegin(), time_domains.cend(), vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend(); -#else +#elif __linux__ const bool has_host_time_domain = std::find(time_domains.cbegin(), time_domains.cend(), vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend(); +#else + // Tracy limitation means only Windows and Linux can use host time domain. + // See https://github.com/shadps4-emu/tracy/blob/c6d779d78508514102fbe1b8eb28bda10d95bb2a/public/tracy/TracyVulkan.hpp#L384-L389 + const bool has_host_time_domain = false; #endif if (has_host_time_domain) { static constexpr std::string_view context_name{"vk_rasterizer"}; diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index cc2660d5..85bd57e2 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -16,6 +16,12 @@ class WindowSDL; VK_DEFINE_HANDLE(VmaAllocator) +#ifdef __APPLE__ +#define VULKAN_LIBRARY_NAME "libMoltenVK.dylib" +#else +#define VULKAN_LIBRARY_NAME +#endif + namespace Vulkan { class Instance { @@ -206,7 +212,7 @@ private: void CollectToolingInfo(); private: - vk::DynamicLoader dl; + vk::DynamicLoader dl{VULKAN_LIBRARY_NAME}; vk::UniqueInstance instance; vk::PhysicalDevice physical_device; vk::UniqueDevice device; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index cbac8fc0..1499d877 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -99,6 +99,17 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::WindowSDL& e UNREACHABLE(); } } +#elif defined(VK_USE_PLATFORM_METAL_EXT) + if (window_info.type == Frontend::WindowSystemType::Metal) { + const vk::MetalSurfaceCreateInfoEXT macos_ci = { + .pLayer = static_cast(window_info.render_surface), + }; + + if (instance.createMetalSurfaceEXT(&macos_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize MacOS surface"); + UNREACHABLE(); + } + } #endif if (!surface) { @@ -135,6 +146,10 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window case Frontend::WindowSystemType::Wayland: extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); break; +#elif defined(VK_USE_PLATFORM_METAL_EXT) + case Frontend::WindowSystemType::Metal: + extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME); + break; #endif default: LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 192bd9ce..55bb99cc 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -36,7 +36,14 @@ static TextureCache* g_texture_cache = nullptr; void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) { ucontext_t* ctx = reinterpret_cast(raw_context); const VAddr address = reinterpret_cast(info->si_addr); - if (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) { + +#ifdef __APPLE__ + const u32 err = ctx->uc_mcontext->__es.__err; +#else + const greg_t err = ctx->uc_mcontext.gregs[REG_ERR]; +#endif + + if (err & 0x2) { g_texture_cache->OnCpuWrite(address); } else { // Read not supported! @@ -69,9 +76,16 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& tile_manager{instance, scheduler} { #ifndef _WIN64 +#ifdef __APPLE__ + // Read-only memory write results in SIGBUS on Apple. + static constexpr int SignalType = SIGBUS; +#else + static constexpr int SignalType = SIGSEGV; +#endif + sigset_t signal_mask; sigemptyset(&signal_mask); - sigaddset(&signal_mask, SIGSEGV); + sigaddset(&signal_mask, SignalType); using HandlerType = decltype(sigaction::sa_sigaction); @@ -79,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK; guest_access_fault.sa_sigaction = &GuestFaultSignalHandler; guest_access_fault.sa_mask = signal_mask; - sigaction(SIGSEGV, &guest_access_fault, nullptr); + sigaction(SignalType, &guest_access_fault, nullptr); #else veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler); ASSERT_MSG(veh_handle, "Failed to register an exception handler");