Add initial macOS support.

This commit is contained in:
squidbus 2024-07-09 02:18:34 -07:00 committed by TheTurtle
parent 36d528743a
commit 66fa29059c
29 changed files with 523 additions and 26 deletions

3
.gitmodules vendored
View File

@ -58,3 +58,6 @@
[submodule "externals/ext-boost"]
path = externals/ext-boost
url = https://github.com/shadps4-emu/ext-boost.git
[submodule "externals/date"]
path = externals/date
url = https://github.com/HowardHinnant/date.git

View File

@ -6,6 +6,10 @@ cmake_minimum_required(VERSION 3.16.3)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED True)
if(APPLE)
enable_language(OBJC)
endif()
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
@ -62,6 +66,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
find_package(Boost 1.84.0 CONFIG)
find_package(cryptopp 8.9.0 MODULE)
find_package(date 3.0.1 CONFIG)
find_package(fmt 10.2.1 CONFIG)
find_package(glslang 14.2.0 CONFIG)
find_package(magic_enum 0.9.6 CONFIG)
@ -75,6 +80,13 @@ find_package(xxHash 0.8.2 MODULE)
find_package(zlib-ng 2.2.0 MODULE)
find_package(Zydis 4.1.0 CONFIG)
include(CheckSymbolExists)
check_symbol_exists(pthread_mutex_timedlock "pthread.h" HAVE_PTHREAD_MUTEX_TIMEDLOCK)
# Windows always has the function through winpthreads
if(HAVE_PTHREAD_MUTEX_TIMEDLOCK OR WIN32)
add_compile_options(-DHAVE_PTHREAD_MUTEX_TIMEDLOCK)
endif()
add_subdirectory(externals)
include_directories(src)
@ -295,7 +307,9 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/file_format/splash.h
src/core/file_format/splash.cpp
src/core/file_sys/fs.cpp
src/core/file_sys/fs.h
src/core/file_sys/fs.h
src/core/instruction_emulator.cpp
src/core/instruction_emulator.h
src/core/loader.cpp
src/core/loader.h
src/core/loader/dwarf.cpp
@ -539,9 +553,15 @@ endif()
create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient date::date-tz)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3)
if (APPLE)
# Link MoltenVK for Vulkan support
find_library(MOLTENVK MoltenVK REQUIRED)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
endif()
if (NOT ENABLE_QT_GUI)
target_link_libraries(shadps4 PRIVATE SDL3::SDL3)
endif()

View File

@ -134,6 +134,13 @@ if (WIN32)
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
endif()
# date
if (NOT TARGET date::date-tz)
option(BUILD_TZ_LIB "" ON)
option(USE_SYSTEM_TZ_DB "" ON)
add_subdirectory(date)
endif()
# Tracy
option(TRACY_ENABLE "" ON)
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash

1
externals/date vendored Submodule

@ -0,0 +1 @@
Subproject commit 1ead6715dec030d340a316c927c877a3c4e5a00c

View File

@ -9,6 +9,7 @@
#include "common/thread.h"
#ifdef __APPLE__
#include <mach/mach.h>
#include <pthread.h>
#elif defined(_WIN32)
#include <windows.h>
#include "common/string_util.h"

View File

@ -245,14 +245,20 @@ struct AddressSpace::Impl {
Impl() {
// Allocate virtual address placeholder for our address space.
void* hint_address = reinterpret_cast<void*>(SYSTEM_MANAGED_MIN);
#ifdef __APPLE__
constexpr int virtual_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#else
constexpr int virtual_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
#endif
virtual_size = SystemSize + UserSize;
virtual_base = reinterpret_cast<u8*>(
mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, -1, 0));
mmap(hint_address, virtual_size, PROT_READ | PROT_WRITE, virtual_flags, -1, 0));
if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(Kernel_Vmm, "mmap failed: {}", strerror(errno));
throw std::bad_alloc{};
}
#ifndef __APPLE__
madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
backing_fd = memfd_create("BackingDmem", 0);
@ -260,6 +266,15 @@ struct AddressSpace::Impl {
LOG_CRITICAL(Kernel_Vmm, "memfd_create failed: {}", strerror(errno));
throw std::bad_alloc{};
}
#else
const auto shm_path = fmt::format("/BackingDmem{}", getpid());
backing_fd = shm_open(shm_path.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
if (backing_fd < 0) {
LOG_CRITICAL(Kernel_Vmm, "shm_open failed: {}", strerror(errno));
throw std::bad_alloc{};
}
shm_unlink(shm_path.c_str());
#endif
// Defined to extend the file with zeros
int ret = ftruncate(backing_fd, BackingSize);

View File

@ -0,0 +1,291 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <memory>
#include <mutex>
#include <Zydis/Zydis.h>
#include <xbyak/xbyak.h>
#include "common/assert.h"
#include "common/types.h"
#include "instruction_emulator.h"
namespace Core {
static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) {
if (reg >= ZYDIS_REGISTER_EAX && reg <= ZYDIS_REGISTER_R15D) {
return Xbyak::Reg32(reg - ZYDIS_REGISTER_EAX);
} else if (reg >= ZYDIS_REGISTER_RAX && reg <= ZYDIS_REGISTER_R15) {
return Xbyak::Reg64(reg - ZYDIS_REGISTER_RAX);
} else {
UNREACHABLE_MSG("Unsupported register: {}", static_cast<u32>(reg));
}
}
static Xbyak::Reg ZydisToXbyakRegisterOperand(const ZydisDecodedOperand& operand) {
ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_REGISTER, "Expected register operand, got type: {}", static_cast<u32>(operand.type));
return ZydisToXbyakRegister(operand.reg.value);
}
static Xbyak::Address ZydisToXbyakMemoryOperand(const ZydisDecodedOperand& operand) {
ASSERT_MSG(operand.type == ZYDIS_OPERAND_TYPE_MEMORY, "Expected memory operand, got type: {}", static_cast<u32>(operand.type));
Xbyak::RegExp expression{};
if (operand.mem.base != ZYDIS_REGISTER_NONE) {
expression = expression + ZydisToXbyakRegister(operand.mem.base);
}
if (operand.mem.index != ZYDIS_REGISTER_NONE) {
if (operand.mem.scale != 0) {
expression = expression + ZydisToXbyakRegister(operand.mem.index) * operand.mem.scale;
} else {
expression = expression + ZydisToXbyakRegister(operand.mem.index);
}
}
if (operand.mem.disp.size != 0 && operand.mem.disp.value != 0) {
expression = expression + operand.mem.disp.value;
}
return Xbyak::util::ptr[expression];
}
static std::unique_ptr<Xbyak::Operand> ZydisToXbyakOperand(const ZydisDecodedOperand& operand) {
switch (operand.type) {
case ZYDIS_OPERAND_TYPE_REGISTER: {
return std::make_unique<Xbyak::Reg>(ZydisToXbyakRegisterOperand(operand));
}
case ZYDIS_OPERAND_TYPE_MEMORY: {
return std::make_unique<Xbyak::Address>(ZydisToXbyakMemoryOperand(operand));
}
default:
UNREACHABLE_MSG("Unsupported operand type: {}", static_cast<u32>(operand.type));
}
}
#ifdef __APPLE__
static bool OperandUsesRegister(const Xbyak::Operand* operand, int index) {
if (operand->isREG()) {
return operand->getIdx() == index;
}
if (operand->isMEM()) {
const Xbyak::RegExp& reg_exp = operand->getAddress().getRegExp();
return reg_exp.getBase().getIdx() == index || reg_exp.getIndex().getIdx() == index;
}
UNREACHABLE_MSG("Unsupported operand kind: {}", static_cast<u32>(operand->getKind()));
}
static bool IsRegisterAllocated(const std::initializer_list<const Xbyak::Operand*>& allocated_registers, const int index) {
return std::ranges::find_if(
allocated_registers.begin(), allocated_registers.end(),
[index](const Xbyak::Operand* operand) { return OperandUsesRegister(operand, index); }) != allocated_registers.end();
}
static Xbyak::Reg AllocateScratchRegister(const std::initializer_list<const Xbyak::Operand*> allocated_registers, const u32 bits) {
for (int index = Xbyak::Operand::R8; index <= Xbyak::Operand::R15; index++) {
if (!IsRegisterAllocated(allocated_registers, index)) {
return Xbyak::Reg32e(index, static_cast<int>(bits));
}
}
UNREACHABLE_MSG("Out of scratch registers!");
}
static constexpr u32 MaxSavedRegisters = 3;
static pthread_key_t register_save_slots[MaxSavedRegisters];
static std::once_flag register_save_init_flag;
static_assert(sizeof(void*) == sizeof(u64), "Cannot fit a register inside a thread local storage slot.");
static void InitializeRegisterSaveSlots() {
for (u32 i = 0; i < MaxSavedRegisters; i++) {
ASSERT_MSG(pthread_key_create(&register_save_slots[i], nullptr) == 0,
"Unable to allocate thread-local register save slot {}", i);
}
}
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to save {} registers.", regs.size());
std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots);
u32 index = 0;
for (const auto& reg : regs) {
const auto offset = reinterpret_cast<void*>(register_save_slots[index++] * sizeof(void*));
c.putSeg(Xbyak::util::gs);
c.mov(Xbyak::util::qword[offset], reg.cvt64());
}
}
static void RestoreRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
ASSERT_MSG(regs.size() <= MaxSavedRegisters, "Not enough space to restore {} registers.", regs.size());
std::call_once(register_save_init_flag, &InitializeRegisterSaveSlots);
u32 index = 0;
for (const auto& reg : regs) {
const auto offset = reinterpret_cast<void*>(register_save_slots[index++] * sizeof(void*));
c.putSeg(Xbyak::util::gs);
c.mov(reg.cvt64(), Xbyak::util::qword[offset]);
}
}
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
const auto src2 = ZydisToXbyakOperand(operands[2]);
const auto scratch = AllocateScratchRegister({&dst, &src1, src2.get()}, dst.getBit());
SaveRegisters(c, {scratch});
c.mov(scratch, src1);
c.not_(scratch);
c.and_(scratch, *src2);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
}
static void GenerateBEXTR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src = ZydisToXbyakOperand(operands[1]);
const auto start_len = ZydisToXbyakRegisterOperand(operands[2]);
const Xbyak::Reg32e shift(Xbyak::Operand::RCX, static_cast<int>(start_len.getBit()));
const auto scratch1 = AllocateScratchRegister({&dst, src.get(), &start_len, &shift}, dst.getBit());
const auto scratch2 = AllocateScratchRegister({&dst, src.get(), &start_len, &shift, &scratch1}, dst.getBit());
if (dst.getIdx() == shift.getIdx()) {
SaveRegisters(c, {scratch1, scratch2});
} else {
SaveRegisters(c, {scratch1, scratch2, shift});
}
c.mov(scratch1, *src);
if (shift.getIdx() != start_len.getIdx()) {
c.mov(shift, start_len);
}
c.shr(scratch1, shift.cvt8());
c.shr(shift, 8);
c.mov(scratch2, 1);
c.shl(scratch2, shift.cvt8());
c.dec(scratch2);
c.mov(dst, scratch1);
c.and_(dst, scratch2);
if (dst.getIdx() == shift.getIdx()) {
RestoreRegisters(c, {scratch1, scratch2});
} else {
RestoreRegisters(c, {scratch1, scratch2, shift});
}
}
static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src = ZydisToXbyakOperand(operands[1]);
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
SaveRegisters(c, {scratch});
c.mov(scratch, *src);
c.neg(scratch);
c.and_(scratch, *src);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
}
static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src = ZydisToXbyakOperand(operands[1]);
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
SaveRegisters(c, {scratch});
c.mov(scratch, *src);
c.dec(scratch);
c.xor_(scratch, *src);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
}
static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src = ZydisToXbyakOperand(operands[1]);
const auto scratch = AllocateScratchRegister({&dst, src.get()}, dst.getBit());
SaveRegisters(c, {scratch});
c.mov(scratch, *src);
c.dec(scratch);
c.and_(scratch, *src);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
}
#endif
using InstructionGenerator = void(*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
static const std::unordered_map<ZydisMnemonic, InstructionGenerator> InstructionGenerators = {
#ifdef __APPLE__
// BMI1 instructions that are not supported by Rosetta 2 on Apple Silicon.
{ZYDIS_MNEMONIC_ANDN, &GenerateANDN},
{ZYDIS_MNEMONIC_BEXTR, &GenerateBEXTR},
{ZYDIS_MNEMONIC_BLSI, &GenerateBLSI},
{ZYDIS_MNEMONIC_BLSMSK, &GenerateBLSMSK},
{ZYDIS_MNEMONIC_BLSR, &GenerateBLSR},
#endif
};
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
if (InstructionGenerators.empty()) {
// Nothing to patch on this platform.
return;
}
ZydisDecoder instr_decoder;
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
u8* code = reinterpret_cast<u8*>(segment_addr);
u8* end = code + segment_size;
while (code < end) {
ZyanStatus status =
ZydisDecoderDecodeFull(&instr_decoder, code, end - code, &instruction, operands);
if (!ZYAN_SUCCESS(status)) {
code++;
continue;
}
if (InstructionGenerators.contains(instruction.mnemonic)) {
LOG_DEBUG(Core, "Replacing instruction '{}' at: {}", ZydisMnemonicGetString(instruction.mnemonic),
fmt::ptr(code));
// Replace instruction with near jump to the trampoline.
static constexpr u32 NearJmpSize = 5;
ASSERT_MSG(instruction.length >= NearJmpSize, "Instruction {} with length {} is too short to replace at: {}",
ZydisMnemonicGetString(instruction.mnemonic), instruction.length, fmt::ptr(code));
auto patch = Xbyak::CodeGenerator(instruction.length, code);
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
patch.nop(instruction.length - NearJmpSize);
auto generator = InstructionGenerators.at(instruction.mnemonic);
generator(operands, c);
c.jmp(code + instruction.length); // Return to the following instruction.
}
code += instruction.length;
}
}
} // namespace Loader

View File

@ -0,0 +1,14 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Xbyak {
class CodeGenerator;
}
namespace Core {
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
} // namespace Core

View File

@ -1,7 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <core/libraries/error_codes.h>
#include <thread>
#include "core/libraries/error_codes.h"
#include "event_flag_obj.h"
namespace Libraries::Kernel {

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include "common/assert.h"
#include "core/libraries/kernel/event_queue.h"

View File

@ -3,11 +3,13 @@
#include <chrono>
#include <thread>
#include <date/tz.h>
#include <boost/asio/io_context.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/polyfill_thread.h"
#include "common/singleton.h"
#include "common/thread.h"
#include "core/file_format/psf.h"
@ -56,7 +58,7 @@ static void KernelServiceThread(std::stop_token stoken) {
HLE_TRACE;
{
std::unique_lock lock{m_asio_req};
cv_asio_req.wait(lock, stoken, [] { return asio_requests != 0; });
Common::CondvarWait(cv_asio_req, lock, stoken, [] { return asio_requests != 0; });
}
if (stoken.stop_requested()) {
break;
@ -180,7 +182,7 @@ s64 PS4_SYSV_ABI ps4__write(int d, const void* buf, std::size_t nbytes) {
int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time,
struct OrbisTimesec* st, unsigned long* dst_sec) {
LOG_TRACE(Kernel, "Called");
const auto* time_zone = std::chrono::current_zone();
const auto* time_zone = date::current_zone();
auto info = time_zone->get_info(std::chrono::system_clock::now());
*local_time = info.offset.count() + info.save.count() * 60 + time;

View File

@ -74,7 +74,12 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}",
searchStart, searchEnd, alignment);
auto* memory = Core::Memory::Instance();
return memory->DirectQueryAvailable(searchStart, searchEnd, alignment, physAddrOut, sizeOut);
PAddr physAddr;
s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut);
*physAddrOut = static_cast<u64>(physAddr);
return size;
}
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,

View File

@ -848,6 +848,37 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr,
return result;
}
#ifndef HAVE_PTHREAD_MUTEX_TIMEDLOCK
static int pthread_mutex_timedlock(pthread_mutex_t* mutex, const struct timespec* abstime) {
int rc;
while ((rc = pthread_mutex_trylock(mutex)) == EBUSY) {
struct timespec curr_time;
clock_gettime(CLOCK_REALTIME, &curr_time);
s64 remaining_ns = 0;
remaining_ns +=
(static_cast<s64>(abstime->tv_sec) - static_cast<s64>(curr_time.tv_sec)) * 1000000000L;
remaining_ns += static_cast<s64>(abstime->tv_nsec) - static_cast<s64>(curr_time.tv_nsec);
if (remaining_ns <= 0) {
return ETIMEDOUT;
}
struct timespec sleep_time;
sleep_time.tv_sec = 0;
if (remaining_ns < 5000000L) {
sleep_time.tv_nsec = remaining_ns;
} else {
sleep_time.tv_nsec = 5000000;
}
nanosleep(&sleep_time, nullptr);
}
return rc;
}
#endif
int PS4_SYSV_ABI scePthreadMutexTimedlock(ScePthreadMutex* mutex, u64 usec) {
mutex = createMutex(mutex);
if (mutex == nullptr) {
@ -1232,7 +1263,10 @@ int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr*
using Destructor = void (*)(void*);
int PS4_SYSV_ABI posix_pthread_key_create(u32* key, Destructor func) {
return pthread_key_create(key, func);
pthread_key_t thread_key;
int rc = pthread_key_create(&thread_key, func);
*key = static_cast<u32>(thread_key);
return rc;
}
int PS4_SYSV_ABI posix_pthread_setspecific(int key, const void* value) {

View File

@ -12,7 +12,10 @@ int PS4_SYSV_ABI scePthreadKeyCreate(OrbisPthreadKey* key, PthreadKeyDestructor
return ORBIS_KERNEL_ERROR_EINVAL;
}
int result = pthread_key_create(key, nullptr);
pthread_key_t thread_key;
int result = pthread_key_create(&thread_key, nullptr);
*key = static_cast<OrbisPthreadKey>(thread_key);
if (destructor) {
auto thread = scePthreadSelf();
thread->key_destructors.emplace_back(*key, destructor);

View File

@ -15,6 +15,7 @@
#include "common/ntapi.h"
#else
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#endif

View File

@ -132,6 +132,11 @@ public:
return total_flexible_size - flexible_usage;
}
/// Returns the offset of the mapped virtual memory base from where it usually would be mapped.
[[nodiscard]] u64 VirtualOffset() noexcept {
return impl.VirtualBase() - SYSTEM_MANAGED_MIN;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type);

View File

@ -7,6 +7,7 @@
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/aerolib/aerolib.h"
#include "core/instruction_emulator.h"
#include "core/loader/dwarf.h"
#include "core/memory.h"
#include "core/module.h"
@ -84,8 +85,9 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
// Map module segments (and possible TLS trampolines)
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize,
MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true);
memory->MapMemory(out_addr, memory->VirtualOffset() + LoadAddress,
aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite,
MemoryMapFlags::Fixed, VMAType::Code, name, true);
LoadAddress += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR);
// Initialize trampoline generator.
@ -131,6 +133,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
add_segment(elf_pheader[i]);
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchTLS(segment_addr, segment_file_size, c);
PatchInstructions(segment_addr, segment_file_size, c);
}
break;
}

View File

@ -8,7 +8,7 @@
#ifdef _WIN32
#include <windows.h>
#else
#elif !defined(__APPLE__)
#include <asm/prctl.h> /* Definition of ARCH_* constants */
#include <sys/syscall.h> /* Definition of SYS_* constants */
#endif
@ -88,6 +88,51 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe
c.jmp(code + total_size); // Return to the instruction right after the mov.
}
#elif defined(__APPLE__)
static pthread_key_t slot = 0;
static std::once_flag slot_alloc_flag;
static void AllocTcbKey() {
ASSERT(pthread_key_create(&slot, nullptr) == 0);
}
void SetTcbBase(void* image_address) {
std::call_once(slot_alloc_flag, &AllocTcbKey);
ASSERT(pthread_setspecific(slot, image_address) == 0);
}
Tcb* GetTcbBase() {
std::call_once(slot_alloc_flag, &AllocTcbKey);
return reinterpret_cast<Tcb*>(pthread_getspecific(slot));
}
static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGenerator& c) {
using namespace Xbyak::util;
const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size;
// Allocate slot in the process if not done already.
std::call_once(slot_alloc_flag, &AllocTcbKey);
static constexpr u32 NearJmpSize = 5;
// Replace fs read with gs read.
auto patch = Xbyak::CodeGenerator(total_size, code);
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
patch.nop(total_size - NearJmpSize);
// Write the trampoline.
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
// The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by pthread_getspecific
// https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96
c.putSeg(gs);
c.mov(target_reg, qword[reinterpret_cast<void*>(slot * sizeof(void*))]); // Load the slot data.
// Return to the instruction right after the mov.
c.jmp(code + total_size);
}
#else
static u32 slot = 0;
@ -110,7 +155,6 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe
// Replace fs read with gs read.
auto patch = Xbyak::CodeGenerator(total_size, code);
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
patch.putSeg(gs);
}
@ -148,11 +192,6 @@ void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
}
ASSERT(offset == 0);
// Allocate slot in the process if not done already.
if (slot == 0) {
AllocTcbKey();
}
// Replace bogus instruction prefix with nops if it exists.
if (std::memcmp(code - BadPrefix.size(), BadPrefix.data(), sizeof(BadPrefix)) == 0) {
auto patch = Xbyak::CodeGenerator(BadPrefix.size(), code - BadPrefix.size());

View File

@ -15,6 +15,7 @@
#include "common/logging/backend.h"
#include "common/ntapi.h"
#include "common/path_util.h"
#include "common/polyfill_thread.h"
#include "common/singleton.h"
#include "common/version.h"
#include "core/file_sys/fs.h"

View File

@ -12,6 +12,10 @@
#include "input/controller.h"
#include "sdl_window.h"
#ifdef __APPLE__
#include <SDL3/SDL_metal.h>
#endif
namespace Frontend {
WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_)
@ -55,6 +59,9 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_
window_info.render_surface = SDL_GetProperty(SDL_GetWindowProperties(window),
SDL_PROP_WINDOW_WAYLAND_SURFACE_POINTER, NULL);
}
#elif defined(SDL_PLATFORM_MACOS)
window_info.type = WindowSystemType::Metal;
window_info.render_surface = SDL_Metal_GetLayer(SDL_Metal_CreateView(window));
#endif
}

View File

@ -19,6 +19,7 @@ enum class WindowSystemType : u8 {
Windows,
X11,
Wayland,
Metal,
};
struct WindowSystemInfo {

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"

View File

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/debug.h"
#include "common/polyfill_thread.h"
#include "common/thread.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h"
@ -31,7 +32,7 @@ void Liverpool::Process(std::stop_token stoken) {
while (!stoken.stop_requested()) {
{
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, stoken, [this] { return num_submits != 0; });
Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; });
}
if (stoken.stop_requested()) {
break;

View File

@ -12,6 +12,7 @@
#include <queue>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/polyfill_thread.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h"

View File

@ -460,7 +460,7 @@ struct PM4CmdWriteData {
template <typename T>
void Address(T addr) {
addr64 = reinterpret_cast<u64>(addr);
addr64 = static_cast<u64>(addr);
}
template <typename T>

View File

@ -292,10 +292,14 @@ bool Instance::CreateDevice() {
const bool has_host_time_domain =
std::find(time_domains.cbegin(), time_domains.cend(),
vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend();
#else
#elif __linux__
const bool has_host_time_domain =
std::find(time_domains.cbegin(), time_domains.cend(),
vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend();
#else
// Tracy limitation means only Windows and Linux can use host time domain.
// See https://github.com/shadps4-emu/tracy/blob/c6d779d78508514102fbe1b8eb28bda10d95bb2a/public/tracy/TracyVulkan.hpp#L384-L389
const bool has_host_time_domain = false;
#endif
if (has_host_time_domain) {
static constexpr std::string_view context_name{"vk_rasterizer"};

View File

@ -16,6 +16,12 @@ class WindowSDL;
VK_DEFINE_HANDLE(VmaAllocator)
#ifdef __APPLE__
#define VULKAN_LIBRARY_NAME "libMoltenVK.dylib"
#else
#define VULKAN_LIBRARY_NAME
#endif
namespace Vulkan {
class Instance {
@ -206,7 +212,7 @@ private:
void CollectToolingInfo();
private:
vk::DynamicLoader dl;
vk::DynamicLoader dl{VULKAN_LIBRARY_NAME};
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;

View File

@ -99,6 +99,17 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::WindowSDL& e
UNREACHABLE();
}
}
#elif defined(VK_USE_PLATFORM_METAL_EXT)
if (window_info.type == Frontend::WindowSystemType::Metal) {
const vk::MetalSurfaceCreateInfoEXT macos_ci = {
.pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface),
};
if (instance.createMetalSurfaceEXT(&macos_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize MacOS surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
@ -135,6 +146,10 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#elif defined(VK_USE_PLATFORM_METAL_EXT)
case Frontend::WindowSystemType::Metal:
extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");

View File

@ -36,7 +36,14 @@ static TextureCache* g_texture_cache = nullptr;
void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
if (ctx->uc_mcontext.gregs[REG_ERR] & 0x2) {
#ifdef __APPLE__
const u32 err = ctx->uc_mcontext->__es.__err;
#else
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
#endif
if (err & 0x2) {
g_texture_cache->OnCpuWrite(address);
} else {
// Read not supported!
@ -69,9 +76,16 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
tile_manager{instance, scheduler} {
#ifndef _WIN64
#ifdef __APPLE__
// Read-only memory write results in SIGBUS on Apple.
static constexpr int SignalType = SIGBUS;
#else
static constexpr int SignalType = SIGSEGV;
#endif
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SIGSEGV);
sigaddset(&signal_mask, SignalType);
using HandlerType = decltype(sigaction::sa_sigaction);
@ -79,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SIGSEGV, &guest_access_fault, nullptr);
sigaction(SignalType, &guest_access_fault, nullptr);
#else
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
ASSERT_MSG(veh_handle, "Failed to register an exception handler");