From c5d1d579b1e7b57fb70879d4bce0fd195139564a Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 15 Jun 2024 14:36:07 +0300 Subject: [PATCH] core: Many things (#194) * video_core: Add a few missed things * libkernel: More proper memory mapped files * memory: Fix tessellation buffer mapping * Cuphead work * sceKernelPollSema fix * clang format * fixed ngs2 lle loading and rtc lib * draft pthreads keys implementation * fixed return codes * return error code if sceKernelLoadStartModule module is invalid * re-enabled system modules and disable debug in libs.h * Improve linux support * fix windows build * kernel: Rework keys --------- Co-authored-by: georgemoralis --- CMakeLists.txt | 16 +- src/common/io_file.cpp | 28 +++ src/common/io_file.h | 3 + src/core/address_space.cpp | 30 +-- src/core/address_space.h | 7 +- src/core/aerolib/stubs.cpp | 5 +- src/core/file_sys/fs.cpp | 1 + src/core/libraries/disc_map/disc_map.cpp | 2 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 5 +- .../kernel/event_flag/event_flag.cpp | 5 +- .../kernel/event_flag/event_flag_obj.cpp | 13 +- .../kernel/event_flag/event_flag_obj.h | 3 +- src/core/libraries/kernel/event_queue.h | 7 + src/core/libraries/kernel/event_queues.cpp | 19 ++ src/core/libraries/kernel/event_queues.h | 1 + src/core/libraries/kernel/file_system.cpp | 23 +++ src/core/libraries/kernel/libkernel.cpp | 154 +++++++++----- .../libraries/kernel/memory_management.cpp | 19 +- .../libraries/kernel/thread_management.cpp | 190 +++++++++++++----- src/core/libraries/kernel/thread_management.h | 42 +++- .../libraries/kernel/threads/kernel_threads.h | 53 ----- src/core/libraries/kernel/threads/keys.cpp | 47 +++++ .../{kernel_threads_rwlock.cpp => rwlock.cpp} | 42 +++- .../libraries/kernel/threads/semaphore.cpp | 180 +++++++++++++++++ src/core/libraries/kernel/threads/threads.h | 20 ++ src/core/libraries/kernel/time_management.cpp | 22 ++ src/core/libraries/libc/libc.cpp | 4 +- .../libraries/libc_internal/libc_internal.cpp | 26 ++- src/core/libraries/libs.h | 41 +++- src/core/libraries/rtc/rtc.cpp | 2 +- src/core/libraries/save_data/savedata.cpp | 2 +- src/core/libraries/system/userservice.cpp | 2 +- src/core/libraries/videoout/driver.cpp | 17 +- src/core/libraries/videoout/video_out.cpp | 13 +- src/core/libraries/videoout/video_out.h | 4 + src/core/linker.cpp | 69 +++++-- src/core/linker.h | 20 +- src/core/loader/dwarf.cpp | 137 +++++++++++++ src/core/loader/dwarf.h | 41 ++++ src/core/loader/elf.h | 9 + src/core/memory.cpp | 61 +++++- src/core/memory.h | 14 ++ src/core/module.cpp | 79 +++++++- src/core/module.h | 78 ++++++- src/core/tls.cpp | 3 +- src/core/tls.h | 2 +- src/emulator.cpp | 9 +- .../frontend/translate/scalar_alu.cpp | 7 +- .../frontend/translate/translate.cpp | 12 +- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_alu.cpp | 9 +- src/video_core/amdgpu/liverpool.cpp | 23 ++- src/video_core/amdgpu/liverpool.h | 15 +- src/video_core/amdgpu/resource.h | 3 +- .../renderer_vulkan/liverpool_to_vk.cpp | 7 + .../renderer_vulkan/renderer_vulkan.cpp | 6 + .../renderer_vulkan/renderer_vulkan.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 9 +- .../renderer_vulkan/vk_graphics_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 11 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/image.h | 2 + src/video_core/texture_cache/image_view.cpp | 15 +- src/video_core/texture_cache/image_view.h | 3 +- .../texture_cache/texture_cache.cpp | 4 +- src/video_core/texture_cache/texture_cache.h | 5 + 67 files changed, 1406 insertions(+), 307 deletions(-) delete mode 100644 src/core/libraries/kernel/threads/kernel_threads.h create mode 100644 src/core/libraries/kernel/threads/keys.cpp rename src/core/libraries/kernel/threads/{kernel_threads_rwlock.cpp => rwlock.cpp} (93%) create mode 100644 src/core/libraries/kernel/threads/semaphore.cpp create mode 100644 src/core/libraries/kernel/threads/threads.h create mode 100644 src/core/loader/dwarf.cpp create mode 100644 src/core/loader/dwarf.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 232e7b47..d85e4da7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,8 +105,10 @@ set(KERNEL_LIB src/core/libraries/kernel/event_flag/event_flag.h src/core/libraries/kernel/event_flag/event_flag_obj.cpp src/core/libraries/kernel/event_flag/event_flag_obj.h - src/core/libraries/kernel/threads/kernel_threads.h - src/core/libraries/kernel/threads/kernel_threads_rwlock.cpp + src/core/libraries/kernel/threads/rwlock.cpp + src/core/libraries/kernel/threads/semaphore.cpp + src/core/libraries/kernel/threads/keys.cpp + src/core/libraries/kernel/threads/threads.h src/core/libraries/kernel/cpu_management.cpp src/core/libraries/kernel/cpu_management.h src/core/libraries/kernel/event_queue.cpp @@ -226,8 +228,6 @@ set(COMMON src/common/logging/backend.cpp src/common/debug.h src/common/disassembler.cpp src/common/disassembler.h - src/common/discord.cpp - src/common/discord.h src/common/endian.h src/common/enum.h src/common/io_file.cpp @@ -278,6 +278,8 @@ set(CORE src/core/aerolib/stubs.cpp src/core/file_sys/fs.h src/core/loader.cpp src/core/loader.h + src/core/loader/dwarf.cpp + src/core/loader/dwarf.h src/core/loader/elf.cpp src/core/loader/elf.h src/core/loader/symbols_resolver.h @@ -500,13 +502,17 @@ else() ${VIDEO_CORE} ${EMULATOR} src/main.cpp + src/emulator.cpp + src/emulator.h + src/sdl_window.h + src/sdl_window.cpp ) endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak Tracy::TracyClient) -target_link_libraries(shadps4 PRIVATE discord-rpc boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) +target_link_libraries(shadps4 PRIVATE boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) if (NOT ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE SDL3-shared) diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index 71c40a16..077c8a31 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -3,6 +3,9 @@ #include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/error.h" #include "common/io_file.h" #include "common/logging/log.h" #include "common/path_util.h" @@ -10,6 +13,7 @@ #ifdef _WIN32 #include #include +#include #else #include #endif @@ -204,6 +208,30 @@ void IOFile::Close() { } file = nullptr; + +#ifdef _WIN64 + if (file_mapping) { + CloseHandle(std::bit_cast(file_mapping)); + } +#endif +} + +uintptr_t IOFile::GetFileMapping() { + if (file_mapping) { + return file_mapping; + } +#ifdef _WIN64 + const int fd = fileno(file); + HANDLE hfile = reinterpret_cast(_get_osfhandle(fd)); + HANDLE mapping = + CreateFileMapping2(hfile, NULL, FILE_MAP_READ, PAGE_READONLY, SEC_COMMIT, 0, NULL, NULL, 0); + file_mapping = std::bit_cast(mapping); + ASSERT_MSG(file_mapping, "{}", Common::GetLastErrorMsg()); + return file_mapping; +#else + file_mapping = fileno(file); + return file_mapping; +#endif } std::string IOFile::ReadString(size_t length) const { diff --git a/src/common/io_file.h b/src/common/io_file.h index 6beeb794..e57a5a78 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -100,6 +100,8 @@ public: return file != nullptr; } + uintptr_t GetFileMapping(); + void Open(const std::filesystem::path& path, FileAccessMode mode, FileType type = FileType::BinaryFile, FileShareFlag flag = FileShareFlag::ShareReadOnly); @@ -212,6 +214,7 @@ private: FileType file_type{}; std::FILE* file = nullptr; + uintptr_t file_mapping = 0; }; } // namespace Common::FS diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 79a6a6f4..f8302641 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -85,7 +85,7 @@ struct AddressSpace::Impl { } } - void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, ULONG prot) { + void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, ULONG prot, uintptr_t fd = 0) { const auto it = placeholders.find(virtual_addr); ASSERT_MSG(it != placeholders.end(), "Cannot map already mapped region"); ASSERT_MSG(virtual_addr >= it->lower() && virtual_addr + size <= it->upper(), @@ -117,8 +117,9 @@ struct AddressSpace::Impl { // Perform the map. void* ptr = nullptr; if (phys_addr != -1) { - ptr = MapViewOfFile3(backing_handle, process, reinterpret_cast(virtual_addr), - phys_addr, size, MEM_REPLACE_PLACEHOLDER, prot, nullptr, 0); + HANDLE backing = fd ? reinterpret_cast(fd) : backing_handle; + ptr = MapViewOfFile3(backing, process, reinterpret_cast(virtual_addr), phys_addr, + size, MEM_REPLACE_PLACEHOLDER, prot, nullptr, 0); } else { ptr = VirtualAlloc2(process, reinterpret_cast(virtual_addr), size, @@ -128,9 +129,9 @@ struct AddressSpace::Impl { return ptr; } - void Unmap(VAddr virtual_addr, PAddr phys_addr, size_t size) { + void Unmap(VAddr virtual_addr, size_t size, bool has_backing) { bool ret; - if (phys_addr != -1) { + if (has_backing) { ret = UnmapViewOfFile2(process, reinterpret_cast(virtual_addr), MEM_PRESERVE_PLACEHOLDER); } else { @@ -254,13 +255,14 @@ struct AddressSpace::Impl { m_free_regions.insert({start_addr, start_addr + virtual_size}); } - void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, PosixPageProtection prot) { + void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, PosixPageProtection prot, + int fd = -1) { m_free_regions.subtract({virtual_addr, virtual_addr + size}); - const int fd = phys_addr != -1 ? backing_fd : -1; - const int host_offset = phys_addr != -1 ? phys_addr : 0; + const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1; + const off_t host_offset = phys_addr != -1 ? phys_addr : 0; const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE); - void* ret = mmap(reinterpret_cast(virtual_addr), size, prot, MAP_FIXED | flag, fd, - host_offset); + void* ret = mmap(reinterpret_cast(virtual_addr), size, prot, MAP_FIXED | flag, + handle, host_offset); ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); return ret; } @@ -323,8 +325,12 @@ void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr ph is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); } -void AddressSpace::Unmap(VAddr virtual_addr, size_t size, PAddr phys_addr) { - return impl->Unmap(virtual_addr, phys_addr, size); +void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, uintptr_t fd) { + return impl->Map(virtual_addr, offset, size, fd ? PAGE_READONLY : PAGE_READWRITE, fd); +} + +void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) { + return impl->Unmap(virtual_addr, size, has_backing); } void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) { diff --git a/src/core/address_space.h b/src/core/address_space.h index ccaeb199..5bb553ae 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -20,7 +20,7 @@ DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) constexpr VAddr SYSTEM_RESERVED = 0x800000000ULL; constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; -constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000ULL; +constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; constexpr VAddr USER_MIN = 0x1000000000ULL; constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL; @@ -62,8 +62,11 @@ public: void* Map(VAddr virtual_addr, size_t size, u64 alignment = 0, PAddr phys_addr = -1, bool exec = false); + /// Memory maps a specified file descriptor. + void* MapFile(VAddr virtual_addr, size_t size, size_t offset, uintptr_t fd); + /// Unmaps specified virtual memory area. - void Unmap(VAddr virtual_addr, size_t size, PAddr phys_addr); + void Unmap(VAddr virtual_addr, size_t size, bool has_backing); void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); diff --git a/src/core/aerolib/stubs.cpp b/src/core/aerolib/stubs.cpp index 3c35829f..5e2d55cd 100644 --- a/src/core/aerolib/stubs.cpp +++ b/src/core/aerolib/stubs.cpp @@ -19,7 +19,7 @@ namespace Core::AeroLib { // and to longer compile / CI times // // Must match STUBS_LIST define -constexpr u32 MAX_STUBS = 512; +constexpr u32 MAX_STUBS = 1024; u64 UnresolvedStub() { LOG_ERROR(Core, "Returning zero to {}", __builtin_return_address(0)); @@ -60,8 +60,9 @@ static u32 UsedStubEntries; #define XREP_128(x) XREP_64(x) XREP_64(x + 64) #define XREP_256(x) XREP_128(x) XREP_128(x + 128) #define XREP_512(x) XREP_256(x) XREP_256(x + 256) +#define XREP_1024(x) XREP_512(x) XREP_512(x + 512) -#define STUBS_LIST XREP_512(0) +#define STUBS_LIST XREP_1024(0) static u64 (*stub_handlers[MAX_STUBS])() = {STUBS_LIST}; diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 6a99a808..c42a0d3e 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -97,4 +97,5 @@ File* HandleTable::getFile(const std::string& host_name) { } return nullptr; } + } // namespace Core::FileSys diff --git a/src/core/libraries/disc_map/disc_map.cpp b/src/core/libraries/disc_map/disc_map.cpp index 7fd5ed3c..79f4acb3 100644 --- a/src/core/libraries/disc_map/disc_map.cpp +++ b/src/core/libraries/disc_map/disc_map.cpp @@ -45,4 +45,4 @@ void RegisterlibSceDiscMap(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("5+vOlukvkfg", "libSceDiscMap", 1, "libSceDiscMap", 1, 1, Func_E7EBCE96E92F91F8); }; -} // namespace Libraries::DiscMap \ No newline at end of file +} // namespace Libraries::DiscMap diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 71040f32..fb829ce5 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -66,6 +66,9 @@ static inline u32* WriteTrailingNop(u32* cmdbuf) { s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { LOG_TRACE(Lib_GnmDriver, "called"); + if (id != SceKernelEvent::Type::GfxEop) { + return ORBIS_OK; + } ASSERT_MSG(id == SceKernelEvent::Type::GfxEop); if (!eq) { @@ -986,7 +989,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, // a check for zero in the upper part of shader address. In our case, the address is a // pointer to a stack memory, so the check will likely fail. To workaround it we will // repeat set shader functionality here as it is trivial. - cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], vs_regs[1]); // SPI_SHADER_PGM_LO_VS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL diff --git a/src/core/libraries/kernel/event_flag/event_flag.cpp b/src/core/libraries/kernel/event_flag/event_flag.cpp index 0ca95768..f4877844 100644 --- a/src/core/libraries/kernel/event_flag/event_flag.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag.cpp @@ -73,7 +73,8 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() { return ORBIS_OK; } int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) { - LOG_ERROR(Kernel_Event, "(STUBBED) called"); + LOG_ERROR(Kernel_Event, "called"); + ef->Clear(bitPattern); return ORBIS_OK; } int PS4_SYSV_ABI sceKernelCancelEventFlag(OrbisKernelEventFlag ef, u64 setPattern, @@ -195,4 +196,4 @@ void RegisterKernelEventFlag(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("IOnSvHzqu6A", "libkernel", 1, "libkernel", 1, 1, sceKernelSetEventFlag); LIB_FUNCTION("JTvBflhYazQ", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEventFlag); } -} // namespace Libraries::Kernel \ No newline at end of file +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp index 66f0d3d7..46429d40 100644 --- a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp @@ -90,4 +90,15 @@ void EventFlagInternal::Set(u64 bits) { m_cond_var.notify_all(); } -} // namespace Libraries::Kernel \ No newline at end of file +void EventFlagInternal::Clear(u64 bits) { + std::unique_lock lock{m_mutex}; + while (m_status != Status::Set) { + m_mutex.unlock(); + std::this_thread::sleep_for(std::chrono::microseconds(10)); + m_mutex.lock(); + } + + m_bits &= bits; +} + +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_flag/event_flag_obj.h b/src/core/libraries/kernel/event_flag/event_flag_obj.h index efeee088..fe50be69 100644 --- a/src/core/libraries/kernel/event_flag/event_flag_obj.h +++ b/src/core/libraries/kernel/event_flag/event_flag_obj.h @@ -25,6 +25,7 @@ public: int Wait(u64 bits, WaitMode wait_mode, ClearMode clear_mode, u64* result, u32* ptr_micros); int Poll(u64 bits, WaitMode wait_mode, ClearMode clear_mode, u64* result); void Set(u64 bits); + void Clear(u64 bits); private: enum class Status { Set, Canceled, Deleted }; @@ -38,4 +39,4 @@ private: QueueMode m_queue_mode = QueueMode::Fifo; u64 m_bits = 0; }; -} // namespace Libraries::Kernel \ No newline at end of file +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_queue.h b/src/core/libraries/kernel/event_queue.h index 745a0ac0..8fc5f5d3 100644 --- a/src/core/libraries/kernel/event_queue.h +++ b/src/core/libraries/kernel/event_queue.h @@ -34,6 +34,13 @@ constexpr s16 EVFILT_GPU_SYSTEM_EXCEPTION = -21; constexpr s16 EVFILT_GPU_DBGGC_EV = -22; constexpr s16 EVFILT_SYSCOUNT = 22; +constexpr u16 EV_ONESHOT = 0x10; // only report one occurrence +constexpr u16 EV_CLEAR = 0x20; // clear event state after reporting +constexpr u16 EV_RECEIPT = 0x40; // force EV_ERROR on success, data=0 +constexpr u16 EV_DISPATCH = 0x80; // disable event after reporting +constexpr u16 EV_SYSFLAGS = 0xF000; // reserved by system +constexpr u16 EV_FLAG1 = 0x2000; // filter-specific flag + class EqueueInternal; struct EqueueEvent; diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index 8b45f53e..e2b151a8 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -88,6 +88,24 @@ int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { event.event.ident = id; event.event.filter = Kernel::EVFILT_USER; event.event.udata = 0; + event.event.flags = 1; + event.event.fflags = 0; + event.event.data = 0; + + return eq->addEvent(event); +} + +int PS4_SYSV_ABI sceKernelAddUserEventEdge(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + Kernel::EqueueEvent event{}; + event.isTriggered = false; + event.event.ident = id; + event.event.filter = Kernel::EVFILT_USER; + event.event.udata = 0; + event.event.flags = 0x21; event.event.fflags = 0; event.event.data = 0; @@ -111,4 +129,5 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id) { eq->removeEvent(id); return ORBIS_OK; } + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index 9e0f2eb4..8c1521b7 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -18,5 +18,6 @@ void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev); int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata); int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id); int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id); +int PS4_SYSV_ABI sceKernelAddUserEventEdge(SceKernelEqueue eq, int id); } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index a454c275..537f959b 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -31,6 +31,10 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0; bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0; + if (std::string_view{path} == "/dev/console" || std::string_view{path} == "/dev/deci_tty6") { + return ORBIS_OK; + } + if (directory) { LOG_ERROR(Kernel_Fs, "called on directory"); } else { @@ -89,6 +93,11 @@ int PS4_SYSV_ABI sceKernelClose(int d) { return SCE_OK; } +int PS4_SYSV_ABI posix_close(int d) { + ASSERT(sceKernelClose(d) == 0); + return ORBIS_OK; +} + size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) { if (buf == nullptr) { return SCE_KERNEL_ERROR_EFAULT; @@ -282,10 +291,22 @@ int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) { return ORBIS_OK; } +int PS4_SYSV_ABI posix_fstat(int fd, OrbisKernelStat* sb) { + return sceKernelFStat(fd, sb); +} + +s32 PS4_SYSV_ABI sceKernelFsync(int fd) { + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + file->f.Flush(); + return ORBIS_OK; +} + void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen); LIB_FUNCTION("wuCroIGjt2g", "libScePosix", 1, "libkernel", 1, 1, posix_open); LIB_FUNCTION("UK2Tl2DWUns", "libkernel", 1, "libkernel", 1, 1, sceKernelClose); + LIB_FUNCTION("bY-PO6JhzhQ", "libScePosix", 1, "libkernel", 1, 1, posix_close); LIB_FUNCTION("4wSze92BhLI", "libkernel", 1, "libkernel", 1, 1, sceKernelWrite); LIB_FUNCTION("+WRlkKjZvag", "libkernel", 1, "libkernel", 1, 1, _readv); @@ -295,10 +316,12 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("1-LFLmRFxxM", "libkernel", 1, "libkernel", 1, 1, sceKernelMkdir); LIB_FUNCTION("eV9wAD2riIA", "libkernel", 1, "libkernel", 1, 1, sceKernelStat); LIB_FUNCTION("kBwCPsYX-m4", "libkernel", 1, "libkernel", 1, 1, sceKernelFStat); + LIB_FUNCTION("mqQMh1zPPT8", "libScePosix", 1, "libkernel", 1, 1, posix_fstat); LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat); LIB_FUNCTION("+r3rMFwItV4", "libkernel", 1, "libkernel", 1, 1, sceKernelPread); LIB_FUNCTION("uWyW3v98sU4", "libkernel", 1, "libkernel", 1, 1, sceKernelCheckReachability); + LIB_FUNCTION("fTx66l5iWIA", "libkernel", 1, "libkernel", 1, 1, sceKernelFsync); // openOrbis (to check if it is valid out of OpenOrbis LIB_FUNCTION("6c3rCVE-fTU", "libkernel", 1, "libkernel", 1, 1, diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 8b47606a..8b093d25 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -62,62 +62,35 @@ size_t PS4_SYSV_ABI _writev(int fd, const struct iovec* iov, int iovcn) { return total_written; } -static thread_local int libc_error; +static thread_local int libc_error{}; int* PS4_SYSV_ABI __Error() { return &libc_error; } -#define PROT_READ 0x1 -#define PROT_WRITE 0x2 - -int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, +int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset, void** res) { -#ifdef _WIN64 - LOG_INFO(Kernel_Vmm, "called"); - if (prot > 3) { - LOG_ERROR(Kernel_Vmm, "prot = {} not supported", prot); + LOG_INFO(Kernel_Vmm, "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, offset = {}", + fmt::ptr(addr), len, prot, flags, fd, offset); + auto* h = Common::Singleton::Instance(); + auto* memory = Core::Memory::Instance(); + const auto mem_prot = static_cast(prot); + const auto mem_flags = static_cast(flags); + if (fd == -1) { + return memory->MapMemory(res, std::bit_cast(addr), len, mem_prot, mem_flags, + Core::VMAType::Flexible); + } else { + const uintptr_t handle = h->GetFile(fd)->f.GetFileMapping(); + return memory->MapFile(res, std::bit_cast(addr), len, mem_prot, mem_flags, handle, + offset); } - DWORD flProtect; - if (prot & PROT_WRITE) { - flProtect = PAGE_READWRITE; - } - off_t end = len + offset; - HANDLE mmap_fd, h; - if (fd == -1) - mmap_fd = INVALID_HANDLE_VALUE; - else - mmap_fd = (HANDLE)_get_osfhandle(fd); - h = CreateFileMapping(mmap_fd, NULL, flProtect, 0, end, NULL); - int k = GetLastError(); - if (NULL == h) - return -1; - DWORD dwDesiredAccess; - if (prot & PROT_WRITE) - dwDesiredAccess = FILE_MAP_WRITE; - else - dwDesiredAccess = FILE_MAP_READ; - void* ret = MapViewOfFile(h, dwDesiredAccess, 0, offset, len); - if (ret == NULL) { - CloseHandle(h); - ret = nullptr; - } - *res = ret; - return 0; -#else - void* result = mmap(addr, len, prot, flags, fd, offset); - if (result != MAP_FAILED) { - *res = result; - return 0; - } - std::abort(); -#endif } -PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) { +void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) { void* ptr; LOG_INFO(Kernel_Vmm, "posix mmap redirect to sceKernelMmap\n"); // posix call the difference is that there is a different behaviour when it doesn't return 0 or // SCE_OK + const VAddr ret_addr = (VAddr)__builtin_return_address(0); int result = sceKernelMmap(addr, len, prot, flags, fd, offset, &ptr); ASSERT(result == 0); return ptr; @@ -201,11 +174,19 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg auto* mnt = Common::Singleton::Instance(); const auto path = mnt->GetHostFile(moduleFileName); - // Load PRX module. + // Load PRX module and relocate any modules that import it. auto* linker = Common::Singleton::Instance(); u32 handle = linker->LoadModule(path); + if (handle == -1) { + return ORBIS_KERNEL_ERROR_EINVAL; + } auto* module = linker->GetModule(handle); - linker->Relocate(module); + linker->RelocateAnyImports(module); + + // If the new module has a TLS image, trigger its load when TlsGetAddr is called. + if (module->tls.image_size != 0) { + linker->AdvanceGenerationCounter(); + } // Retrieve and verify proc param according to libkernel. u64* param = module->GetProcParam(); @@ -225,10 +206,84 @@ s32 PS4_SYSV_ABI sceKernelDlsym(s32 handle, const char* symbol, void** addrp) { return ORBIS_OK; } +static constexpr size_t ORBIS_DBG_MAX_NAME_LENGTH = 256; + +struct OrbisModuleInfoForUnwind { + u64 st_size; + std::array name; + VAddr eh_frame_hdr_addr; + VAddr eh_frame_addr; + u64 eh_frame_size; + VAddr seg0_addr; + u64 seg0_size; +}; + +s32 PS4_SYSV_ABI sceKernelGetModuleInfoForUnwind(VAddr addr, int flags, + OrbisModuleInfoForUnwind* info) { + if (flags >= 3) { + std::memset(info, 0, sizeof(OrbisModuleInfoForUnwind)); + return SCE_KERNEL_ERROR_EINVAL; + } + if (!info) { + return ORBIS_KERNEL_ERROR_EFAULT; + } + if (info->st_size <= sizeof(OrbisModuleInfoForUnwind)) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + // Find module that contains specified address. + LOG_INFO(Lib_Kernel, "called addr = {:#x}, flags = {:#x}", addr, flags); + auto* linker = Common::Singleton::Instance(); + auto* module = linker->FindByAddress(addr); + const auto mod_info = module->GetModuleInfoEx(); + + // Fill in module info. + info->name = mod_info.name; + info->eh_frame_hdr_addr = mod_info.eh_frame_hdr_addr; + info->eh_frame_addr = mod_info.eh_frame_addr; + info->eh_frame_size = mod_info.eh_frame_size; + info->seg0_addr = mod_info.segments[0].address; + info->seg0_size = mod_info.segments[0].size; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelGetModuleInfoFromAddr(VAddr addr, int flags, + Core::OrbisKernelModuleInfoEx* info) { + LOG_INFO(Lib_Kernel, "called addr = {:#x}, flags = {:#x}", addr, flags); + auto* linker = Common::Singleton::Instance(); + auto* module = linker->FindByAddress(addr); + *info = module->GetModuleInfoEx(); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelDebugRaiseException() { + UNREACHABLE(); + return 0; +} + +char PS4_SYSV_ABI _is_signal_return(s64* param_1) { + char cVar1; + + if (((*param_1 != 0x48006a40247c8d48ULL) || (param_1[1] != 0x50f000001a1c0c7ULL)) || + (cVar1 = '\x01', (param_1[2] & 0xffffffU) != 0xfdebf4)) { + cVar1 = ((*(u64*)((s64)param_1 + -5) & 0xffffffffff) == 0x50fca8949) * '\x02'; + } + return cVar1; +} + +int PS4_SYSV_ABI sceKernelGetCpumode() { + return 5; +} + +void PS4_SYSV_ABI sched_yield() { + return std::this_thread::yield(); +} + void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { // obj LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard); // memory + LIB_FUNCTION("OMDRKKAZ8I4", "libkernel", 1, "libkernel", 1, 1, sceKernelDebugRaiseException); LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory); LIB_FUNCTION("B+vc2AO2Zrc", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateMainDirectMemory); @@ -248,6 +303,9 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { _sceKernelRtldSetApplicationHeapAPI); LIB_FUNCTION("wzvqT4UqKX8", "libkernel", 1, "libkernel", 1, 1, sceKernelLoadStartModule); LIB_FUNCTION("LwG8g3niqwA", "libkernel", 1, "libkernel", 1, 1, sceKernelDlsym); + LIB_FUNCTION("RpQJJVKTiFM", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfoForUnwind); + LIB_FUNCTION("f7KBOafysXo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfoFromAddr); + LIB_FUNCTION("VOx8NGmHXTs", "libkernel", 1, "libkernel", 1, 1, sceKernelGetCpumode); // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); @@ -255,6 +313,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue); LIB_FUNCTION("vz+pg2zdopI", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventUserData); LIB_FUNCTION("4R6-OvI2cEA", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEvent); + LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); @@ -263,11 +322,13 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Ou3iL1abvng", "libkernel", 1, "libkernel", 1, 1, stack_chk_fail); LIB_FUNCTION("9BcDykPmo1I", "libkernel", 1, "libkernel", 1, 1, __Error); LIB_FUNCTION("BPE9s9vQQXo", "libkernel", 1, "libkernel", 1, 1, posix_mmap); + LIB_FUNCTION("BPE9s9vQQXo", "libScePosix", 1, "libkernel", 1, 1, posix_mmap); LIB_FUNCTION("YSHRBRLn2pI", "libkernel", 1, "libkernel", 1, 1, _writev); LIB_FUNCTION("959qrazPIrg", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcParam); LIB_FUNCTION("-o5uEDpN+oY", "libkernel", 1, "libkernel", 1, 1, sceKernelConvertUtcToLocaltime); LIB_FUNCTION("WB66evu8bsU", "libkernel", 1, "libkernel", 1, 1, sceKernelGetCompiledSdkVersion); LIB_FUNCTION("DRuBt2pvICk", "libkernel", 1, "libkernel", 1, 1, ps4__read); + LIB_FUNCTION("crb5j7mkk1c", "libkernel", 1, "libkernel", 1, 1, _is_signal_return); Libraries::Kernel::fileSystemSymbolsRegister(sym); Libraries::Kernel::timeSymbolsRegister(sym); @@ -278,6 +339,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("NWtTN10cJzE", "libSceLibcInternalExt", 1, "libSceLibcInternal", 1, 1, sceLibcHeapGetTraceInfo); LIB_FUNCTION("FxVZqBAA7ks", "libkernel", 1, "libkernel", 1, 1, ps4__write); + LIB_FUNCTION("6XG4B33N09g", "libScePosix", 1, "libkernel", 1, 1, sched_yield); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 56961a72..4683440a 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -80,6 +80,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize) { LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags); + if (!addr) { + return SCE_KERNEL_ERROR_EACCES; + } auto* memory = Core::Memory::Instance(); return memory->VirtualQuery(std::bit_cast(addr), flags, info); } @@ -87,10 +90,10 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment, const char* name) { - LOG_INFO( - Kernel_Vmm, - "len = {:#x}, prot = {:#x}, flags = {:#x}, directMemoryStart = {:#x}, alignment = {:#x}", - len, prot, flags, directMemoryStart, alignment); + LOG_INFO(Kernel_Vmm, + "addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, directMemoryStart = {:#x}, " + "alignment = {:#x}", + fmt::ptr(*addr), len, prot, flags, directMemoryStart, alignment); if (len == 0 || !Common::Is16KBAligned(len)) { LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!"); @@ -117,11 +120,7 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment) { - LOG_INFO(Kernel_Vmm, - "redirected to sceKernelMapNamedDirectMemory: " - "len = {:#x}, prot = {:#x}, flags = {:#x}, directMemoryStart = {:#x}, alignment = " - "{:#x}", - len, prot, flags, directMemoryStart, alignment); + LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory"); return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, ""); } @@ -169,7 +168,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize) { - LOG_WARNING(Kernel_Vmm, "called"); + LOG_WARNING(Kernel_Vmm, "called offset = {:#x}, flags = {:#x}", offset, flags); auto* memory = Core::Memory::Instance(); return memory->DirectMemoryQuery(offset, flags == 1, query_info); } diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index ce83f5d3..946c248c 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -3,18 +3,20 @@ #include #include +#include #include "common/assert.h" +#include "common/error.h" #include "common/logging/log.h" #include "common/singleton.h" #include "common/thread.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/thread_management.h" +#include "core/libraries/kernel/threads/threads.h" #include "core/libraries/libs.h" #include "core/linker.h" #ifdef _WIN64 #include #endif -#include "core/libraries/kernel/threads/kernel_threads.h" namespace Libraries::Kernel { @@ -40,6 +42,7 @@ void init_pthreads() { scePthreadRwlockattrInit(&default_rwattr); g_pthread_cxt->setDefaultRwattr(default_rwattr); + g_pthread_cxt->setPthreadKeys(new PthreadKeys); g_pthread_cxt->SetPthreadPool(new PThreadPool); } @@ -136,9 +139,8 @@ int PS4_SYSV_ABI scePthreadAttrGetdetachstate(const ScePthreadAttr* attr, int* s return SCE_KERNEL_ERROR_EINVAL; } - // int result = pthread_attr_getdetachstate(&(*attr)->p, state); + // int result = pthread_attr_getdetachstate(&(*attr)->pth_attr, state); int result = 0; - *state = ((*attr)->detached ? PTHREAD_CREATE_DETACHED : PTHREAD_CREATE_JOINABLE); switch (*state) { @@ -172,12 +174,9 @@ int PS4_SYSV_ABI scePthreadAttrSetdetachstate(ScePthreadAttr* attr, int detachst UNREACHABLE_MSG("Invalid detachstate: {}", detachstate); } - // int result = pthread_attr_setdetachstate(&(*attr)->pth_attr, pstate); doesn't seem to work - // correctly + // int result = pthread_attr_setdetachstate(&(*attr)->pth_attr, pstate); int result = 0; - (*attr)->detached = (pstate == PTHREAD_CREATE_DETACHED); - return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } @@ -244,7 +243,6 @@ int PS4_SYSV_ABI scePthreadAttrSetschedparam(ScePthreadAttr* attr, } int PS4_SYSV_ABI scePthreadAttrGetschedpolicy(const ScePthreadAttr* attr, int* policy) { - if (policy == nullptr || attr == nullptr || *attr == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } @@ -273,16 +271,26 @@ int PS4_SYSV_ABI scePthreadAttrSetschedpolicy(ScePthreadAttr* attr, int policy) return SCE_KERNEL_ERROR_EINVAL; } - int ppolicy = SCHED_OTHER; // winpthreads only supports SCHED_OTHER - if (policy != SCHED_OTHER) { - LOG_ERROR(Kernel_Pthread, "policy={} not supported by winpthreads\n", policy); + int ppolicy = SCHED_OTHER; + switch (policy) { + case 0: + ppolicy = SCHED_OTHER; + break; + case 1: + ppolicy = SCHED_FIFO; + break; + case 3: + ppolicy = SCHED_OTHER; + break; + default: + UNREACHABLE(); } + (*attr)->policy = policy; - int result = pthread_attr_setschedpolicy(&(*attr)->pth_attr, ppolicy); - return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } + ScePthread PS4_SYSV_ABI scePthreadSelf() { return g_pthread_self; } @@ -296,7 +304,6 @@ int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, } (*pattr)->affinity = mask; - return SCE_OK; } @@ -389,16 +396,18 @@ int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpuma return result; } -void* createMutex(void* addr) { - if (addr == nullptr || *static_cast(addr) != nullptr) { +ScePthreadMutex* createMutex(ScePthreadMutex* addr) { + if (addr == nullptr || *addr != nullptr) { return addr; } static std::mutex mutex; std::scoped_lock lk{mutex}; - auto vaddr = reinterpret_cast(addr); - + if (*addr != nullptr) { + return addr; + } + const VAddr vaddr = reinterpret_cast(addr); std::string name = fmt::format("mutex{:#x}", vaddr); - scePthreadMutexInit(static_cast(addr), nullptr, name.c_str()); + scePthreadMutexInit(addr, nullptr, name.c_str()); return addr; } @@ -466,7 +475,7 @@ int PS4_SYSV_ABI scePthreadMutexattrInit(ScePthreadMutexattr* attr) { int result = pthread_mutexattr_init(&(*attr)->pth_mutex_attr); - result = (result == 0 ? scePthreadMutexattrSettype(attr, 2) : result); + result = (result == 0 ? scePthreadMutexattrSettype(attr, 1) : result); result = (result == 0 ? scePthreadMutexattrSetprotocol(attr, 0) : result); switch (result) { @@ -517,22 +526,20 @@ int PS4_SYSV_ABI scePthreadMutexattrSetprotocol(ScePthreadMutexattr* attr, int p UNREACHABLE_MSG("Invalid protocol: {}", protocol); } - int result = 0; // pthread_mutexattr_setprotocol(&(*attr)->p, pprotocol); //it appears that - // pprotocol has issues in winpthreads + int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol); (*attr)->pprotocol = pprotocol; - return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } -int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) { - mutex = static_cast(createMutex(mutex)); +int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) { + mutex = createMutex(mutex); if (mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } int result = pthread_mutex_lock(&(*mutex)->pth_mutex); if (result != 0) { - LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result); } switch (result) { case 0: @@ -547,20 +554,20 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) { return SCE_KERNEL_ERROR_EINVAL; } } + int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) { - mutex = static_cast(createMutex(mutex)); + mutex = createMutex(mutex); if (mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } int result = pthread_mutex_unlock(&(*mutex)->pth_mutex); if (result != 0) { - LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result); } switch (result) { case 0: return SCE_OK; - case EINVAL: return SCE_KERNEL_ERROR_EINVAL; case EPERM: @@ -571,7 +578,6 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) { } int PS4_SYSV_ABI scePthreadMutexattrDestroy(ScePthreadMutexattr* attr) { - int result = pthread_mutexattr_destroy(&(*attr)->pth_mutex_attr); delete *attr; @@ -587,12 +593,16 @@ int PS4_SYSV_ABI scePthreadMutexattrDestroy(ScePthreadMutexattr* attr) { } } -void* createCond(void* addr) { - if (addr == nullptr || *static_cast(addr) != nullptr) { +ScePthreadCond* createCond(ScePthreadCond* addr) { + if (addr == nullptr || *addr != nullptr) { return addr; } - auto vaddr = reinterpret_cast(addr); - + static std::mutex mutex; + std::scoped_lock lk{mutex}; + if (*addr != nullptr) { + return addr; + } + const VAddr vaddr = reinterpret_cast(addr); std::string name = fmt::format("cond{:#x}", vaddr); scePthreadCondInit(static_cast(addr), nullptr, name.c_str()); return addr; @@ -652,22 +662,20 @@ int PS4_SYSV_ABI scePthreadCondattrInit(ScePthreadCondattr* attr) { } int PS4_SYSV_ABI scePthreadCondBroadcast(ScePthreadCond* cond) { - LOG_INFO(Kernel_Pthread, "called"); - cond = static_cast(createCond(cond)); - + cond = createCond(cond); if (cond == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } int result = pthread_cond_broadcast(&(*cond)->cond); - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*cond)->name, result); + LOG_TRACE(Kernel_Pthread, "called name={}, result={}", (*cond)->name, result); return (result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL); } int PS4_SYSV_ABI scePthreadCondTimedwait(ScePthreadCond* cond, ScePthreadMutex* mutex, u64 usec) { - cond = static_cast(createCond(cond)); + cond = createCond(cond); if (cond == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } @@ -873,6 +881,11 @@ int PS4_SYSV_ABI scePthreadAttrGet(ScePthread thread, ScePthreadAttr* attr) { static void cleanup_thread(void* arg) { auto* thread = static_cast(arg); + for (const auto& [key, destructor] : thread->key_destructors) { + if (void* value = pthread_getspecific(key); value != nullptr) { + destructor(value); + } + } thread->is_almost_done = true; } @@ -891,7 +904,7 @@ static void* run_thread(void* arg) { } int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr, - pthreadEntryFunc start_routine, void* arg, const char* name) { + PthreadEntryFunc start_routine, void* arg, const char* name) { if (thread == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } @@ -1031,9 +1044,9 @@ int PS4_SYSV_ABI scePthreadCondSignal(ScePthreadCond* cond) { } int PS4_SYSV_ABI scePthreadCondWait(ScePthreadCond* cond, ScePthreadMutex* mutex) { - if (cond == nullptr || *cond == nullptr) { - // return SCE_KERNEL_ERROR_EINVAL; - cond = static_cast(createCond(cond)); // check this. Kero Blaster. + cond = createCond(cond); + if (cond == nullptr) { + return SCE_KERNEL_ERROR_EINVAL; } if (mutex == nullptr || *mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; @@ -1073,7 +1086,7 @@ int PS4_SYSV_ABI scePthreadCondattrDestroy(ScePthreadCondattr* attr) { } int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) { - + mutex = createMutex(mutex); if (mutex == nullptr) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -1124,7 +1137,7 @@ int PS4_SYSV_ABI posix_sched_get_priority_min() { int PS4_SYSV_ABI posix_pthread_mutex_trylock(ScePthreadMutex* mutex) { int result = scePthreadMutexTrylock(mutex); if (result < 0) { - UNREACHABLE(); + // UNREACHABLE(); } return result; } @@ -1153,7 +1166,7 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det } int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr, - pthreadEntryFunc start_routine, void* arg, + PthreadEntryFunc start_routine, void* arg, const char* name) { LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name); @@ -1167,15 +1180,30 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthre return result; } -int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) { - return pthread_once(reinterpret_cast(once_control), init_routine); -} int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* attr, - pthreadEntryFunc start_routine, void* arg) { - LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate"); + PthreadEntryFunc start_routine, void* arg) { + return posix_pthread_create_name_np(thread, attr, start_routine, arg, "NoName"); +} - int result = scePthreadCreate(thread, attr, start_routine, arg, ""); - if (result != 0) { +using Destructor = void (*)(void*); + +int PS4_SYSV_ABI posix_pthread_key_create(u32* key, Destructor func) { + return pthread_key_create(key, func); +} + +int PS4_SYSV_ABI posix_pthread_setspecific(int key, const void* value) { + return pthread_setspecific(key, value); +} + +void* PS4_SYSV_ABI posix_pthread_getspecific(int key) { + return pthread_getspecific(key); +} + +int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadCondattr* attr, + const char* name) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit"); + int result = scePthreadCondInit(cond, attr, name); + if (result < 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP ? result + -SCE_KERNEL_ERROR_UNKNOWN : POSIX_EOTHER; @@ -1183,7 +1211,47 @@ int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* } return result; } + +int PS4_SYSV_ABI posix_pthread_setcancelstate(int state, int* oldstate) { + return pthread_setcancelstate(state, oldstate); +} + +int PS4_SYSV_ABI posix_pthread_detach(ScePthread thread) { + return pthread_detach(thread->pth); +} + +int PS4_SYSV_ABI posix_sem_init(sem_t* sem, int pshared, unsigned int value) { + return sem_init(sem, pshared, value); +} + +int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) { + return sem_wait(sem); +} + +int PS4_SYSV_ABI posix_sem_post(sem_t* sem) { + return sem_post(sem); +} + +int PS4_SYSV_ABI scePthreadGetschedparam(ScePthread thread, int* policy, + SceKernelSchedParam* param) { + return pthread_getschedparam(thread->pth, policy, param); +} + +int PS4_SYSV_ABI scePthreadSetschedparam(ScePthread thread, int policy, + const SceKernelSchedParam* param) { + return pthread_setschedparam(thread->pth, policy, param); +} + +int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) { + return pthread_once(reinterpret_cast(once_control), init_routine); +} + void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); + LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init); + LIB_FUNCTION("mqULNdimTn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_key_create); + LIB_FUNCTION("0-KXaS70xy4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getspecific); + LIB_FUNCTION("WrOLvHU0yQM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setspecific); LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy); LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate); LIB_FUNCTION("eXbUSpEaTsA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetinheritsched); @@ -1194,7 +1262,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach); LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual); LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal); - LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); + LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); @@ -1202,6 +1270,9 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("3qxgM4ezETA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetaffinity); LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity); LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet); + LIB_FUNCTION("FXPWHNk8Of0", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetschedparam); + LIB_FUNCTION("P41kTWUS3EI", "libkernel", 1, "libkernel", 1, 1, scePthreadGetschedparam); + LIB_FUNCTION("oIRFTjoILbg", "libkernel", 1, "libkernel", 1, 1, scePthreadSetschedparam); LIB_FUNCTION("UTXzJbWhhTE", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstacksize); LIB_FUNCTION("vNe1w4diLCs", "libkernel", 1, "libkernel", 1, 1, __tls_get_addr); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); @@ -1210,6 +1281,8 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("6UgtwV+0zb4", "libkernel", 1, "libkernel", 1, 1, scePthreadCreate); LIB_FUNCTION("T72hz6ffq08", "libkernel", 1, "libkernel", 1, 1, scePthreadYield); LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack); + LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr); + LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize); LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce); // mutex calls @@ -1259,10 +1332,17 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { posix_pthread_attr_setdetachstate); LIB_FUNCTION("zHchY8ft5pk", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_destroy); LIB_FUNCTION("Jmi+9w9u0E4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create_name_np); + LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach); LIB_FUNCTION("CBNtXOoef-E", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_max); LIB_FUNCTION("m0iS6jNsXds", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_min); + LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init); + LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait); + LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post); // libs - ThreadsRwlockSymbolsRegister(sym); + RwlockSymbolsRegister(sym); + SemaphoreSymbolsRegister(sym); + KeySymbolsRegister(sym); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/thread_management.h b/src/core/libraries/kernel/thread_management.h index c8357e68..1909ffd3 100644 --- a/src/core/libraries/kernel/thread_management.h +++ b/src/core/libraries/kernel/thread_management.h @@ -28,6 +28,7 @@ struct PthreadCondInternal; struct PthreadCondAttrInternal; struct PthreadRwInternal; struct PthreadRwLockAttrInternal; +class PthreadKeys; using SceKernelSchedParam = ::sched_param; using ScePthread = PthreadInternal*; @@ -38,20 +39,24 @@ using ScePthreadCond = PthreadCondInternal*; using ScePthreadCondattr = PthreadCondAttrInternal*; using OrbisPthreadRwlock = PthreadRwInternal*; using OrbisPthreadRwlockattr = PthreadRwLockAttrInternal*; +using OrbisPthreadKey = u32; -using pthreadEntryFunc = PS4_SYSV_ABI void* (*)(void*); +using PthreadKeyDestructor = PS4_SYSV_ABI void (*)(void*); +using PthreadEntryFunc = PS4_SYSV_ABI void* (*)(void*); struct PthreadInternal { u8 reserved[4096]; std::string name; pthread_t pth; ScePthreadAttr attr; - pthreadEntryFunc entry; + PthreadEntryFunc entry; void* arg; std::atomic_bool is_started; std::atomic_bool is_detached; std::atomic_bool is_almost_done; std::atomic_bool is_free; + using Destructor = std::pair; + std::vector key_destructors; }; struct PthreadAttrInternal { @@ -106,6 +111,30 @@ private: std::mutex m_mutex; }; +class PthreadKeys { +public: + PthreadKeys() {} + virtual ~PthreadKeys() {} + + bool CreateKey(int* key, PthreadKeyDestructor destructor); + bool GetKey(int key, int thread_id, void** data); + bool SetKey(int key, int thread_id, void* data); + +private: + struct Map { + int thread_id = -1; + void* data = nullptr; + }; + + struct Key { + bool used = false; + PthreadKeyDestructor destructor = nullptr; + std::vector specific_values; + }; + + std::mutex m_mutex; + Key m_keys[256]; +}; class PThreadCxt { public: ScePthreadMutexattr* getDefaultMutexattr() { @@ -138,6 +167,12 @@ public: void setDefaultRwattr(OrbisPthreadRwlockattr attr) { m_default_Rwattr = attr; } + PthreadKeys* getPthreadKeys() { + return m_pthread_keys; + } + void setPthreadKeys(PthreadKeys* keys) { + m_pthread_keys = keys; + } private: ScePthreadMutexattr m_default_mutexattr = nullptr; @@ -145,6 +180,7 @@ private: ScePthreadAttr m_default_attr = nullptr; PThreadPool* m_pthread_pool = nullptr; OrbisPthreadRwlockattr m_default_Rwattr = nullptr; + PthreadKeys* m_pthread_keys = nullptr; }; void init_pthreads(); @@ -161,7 +197,7 @@ int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, const /*SceKernelCpumask*/ u64 mask); int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask); int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr, - pthreadEntryFunc start_routine, void* arg, const char* name); + PthreadEntryFunc start_routine, void* arg, const char* name); /*** * Mutex calls diff --git a/src/core/libraries/kernel/threads/kernel_threads.h b/src/core/libraries/kernel/threads/kernel_threads.h deleted file mode 100644 index a2a2eb48..00000000 --- a/src/core/libraries/kernel/threads/kernel_threads.h +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "core/libraries/kernel/thread_management.h" - -namespace Core::Loader { -class SymbolsResolver; -} - -namespace Libraries::Kernel { -/**** - * rwlock calls - */ -int PS4_SYSV_ABI posix_pthread_rwlock_destroy(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlock_init(OrbisPthreadRwlock* rwlock, - const OrbisPthreadRwlockattr* attr, const char* name); -int PS4_SYSV_ABI posix_pthread_rwlock_rdlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlock_reltimedrdlock_np(); -int PS4_SYSV_ABI posix_pthread_rwlock_reltimedwrlock_np(); -int PS4_SYSV_ABI posix_pthread_rwlock_setname_np(); -int PS4_SYSV_ABI posix_pthread_rwlock_timedrdlock(); -int PS4_SYSV_ABI posix_pthread_rwlock_timedwrlock(); -int PS4_SYSV_ABI posix_pthread_rwlock_tryrdlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlock_trywrlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlock_unlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlock_wrlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI posix_pthread_rwlockattr_destroy(OrbisPthreadRwlockattr* attr); -int PS4_SYSV_ABI posix_pthread_rwlockattr_getpshared(); -int PS4_SYSV_ABI posix_pthread_rwlockattr_gettype_np(); -int PS4_SYSV_ABI posix_pthread_rwlockattr_init(OrbisPthreadRwlockattr* attr); -int PS4_SYSV_ABI posix_pthread_rwlockattr_setpshared(); -int PS4_SYSV_ABI posix_pthread_rwlockattr_settype_np(); -int PS4_SYSV_ABI scePthreadRwlockattrDestroy(OrbisPthreadRwlockattr* attr); -int PS4_SYSV_ABI scePthreadRwlockattrGetpshared(); -int PS4_SYSV_ABI scePthreadRwlockattrGettype(); -int PS4_SYSV_ABI scePthreadRwlockattrInit(OrbisPthreadRwlockattr* attr); -int PS4_SYSV_ABI scePthreadRwlockattrSetpshared(); -int PS4_SYSV_ABI scePthreadRwlockattrSettype(); -int PS4_SYSV_ABI scePthreadRwlockDestroy(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI scePthreadRwlockInit(OrbisPthreadRwlock* rwlock, - const OrbisPthreadRwlockattr* attr, const char* name); -int PS4_SYSV_ABI scePthreadRwlockRdlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI scePthreadRwlockTimedrdlock(); -int PS4_SYSV_ABI scePthreadRwlockTimedwrlock(); -int PS4_SYSV_ABI scePthreadRwlockTryrdlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI scePthreadRwlockTrywrlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI scePthreadRwlockUnlock(OrbisPthreadRwlock* rwlock); -int PS4_SYSV_ABI scePthreadRwlockWrlock(OrbisPthreadRwlock* rwlock); - -void ThreadsRwlockSymbolsRegister(Core::Loader::SymbolsResolver* sym); -} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/threads/keys.cpp b/src/core/libraries/kernel/threads/keys.cpp new file mode 100644 index 00000000..78646833 --- /dev/null +++ b/src/core/libraries/kernel/threads/keys.cpp @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/libraries/error_codes.h" +#include "core/libraries/kernel/thread_management.h" +#include "core/libraries/libs.h" + +namespace Libraries::Kernel { + +int PS4_SYSV_ABI scePthreadKeyCreate(OrbisPthreadKey* key, PthreadKeyDestructor destructor) { + if (key == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + int result = pthread_key_create(key, nullptr); + if (destructor) { + auto thread = scePthreadSelf(); + thread->key_destructors.emplace_back(*key, destructor); + } + + if (result != 0) { + LOG_ERROR(Kernel_Pthread, "scePthreadKeyCreate: error = {}", result); + result += ORBIS_KERNEL_ERROR_UNKNOWN; + } + return result; +} + +void* PS4_SYSV_ABI scePthreadGetspecific(OrbisPthreadKey key) { + return pthread_getspecific(key); +} + +int PS4_SYSV_ABI scePthreadSetspecific(OrbisPthreadKey key, /* const*/ void* value) { + int result = pthread_setspecific(key, value); + if (result != 0) { + LOG_ERROR(Kernel_Pthread, "scePthreadSetspecific: error = {}", result); + result += ORBIS_KERNEL_ERROR_UNKNOWN; + } + return result; +} + +void KeySymbolsRegister(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("geDaqgH9lTg", "libkernel", 1, "libkernel", 1, 1, scePthreadKeyCreate); + LIB_FUNCTION("eoht7mQOCmo", "libkernel", 1, "libkernel", 1, 1, scePthreadGetspecific); + LIB_FUNCTION("+BzXYkqYeLE", "libkernel", 1, "libkernel", 1, 1, scePthreadSetspecific); +} + +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/kernel_threads_rwlock.cpp b/src/core/libraries/kernel/threads/rwlock.cpp similarity index 93% rename from src/core/libraries/kernel/threads/kernel_threads_rwlock.cpp rename to src/core/libraries/kernel/threads/rwlock.cpp index a092d712..87271fe2 100644 --- a/src/core/libraries/kernel/threads/kernel_threads_rwlock.cpp +++ b/src/core/libraries/kernel/threads/rwlock.cpp @@ -4,7 +4,7 @@ #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" -#include "kernel_threads.h" +#include "threads.h" namespace Libraries::Kernel { @@ -34,7 +34,23 @@ int PS4_SYSV_ABI posix_pthread_rwlock_init(OrbisPthreadRwlock* rwlock, return ORBIS_OK; } +OrbisPthreadRwlock* createRwlock(OrbisPthreadRwlock* rwlock) { + if (rwlock == nullptr || *rwlock != nullptr) { + return rwlock; + } + static std::mutex mutex; + std::scoped_lock lk{mutex}; + if (*rwlock != nullptr) { + return rwlock; + } + const VAddr addr = std::bit_cast(rwlock); + const auto name = fmt::format("rwlock{:#x}", addr); + posix_pthread_rwlock_init(rwlock, nullptr, name.c_str()); + return rwlock; +} + int PS4_SYSV_ABI posix_pthread_rwlock_rdlock(OrbisPthreadRwlock* rwlock) { + rwlock = createRwlock(rwlock); int result = pthread_rwlock_rdlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "posix_pthread_rwlock_rdlock: error = {}", result); @@ -69,6 +85,10 @@ int PS4_SYSV_ABI posix_pthread_rwlock_timedwrlock() { } int PS4_SYSV_ABI posix_pthread_rwlock_tryrdlock(OrbisPthreadRwlock* rwlock) { + rwlock = createRwlock(rwlock); + if (rwlock == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } int result = pthread_rwlock_tryrdlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "posix_pthread_rwlock_tryrdlock: error = {}", result); @@ -77,6 +97,10 @@ int PS4_SYSV_ABI posix_pthread_rwlock_tryrdlock(OrbisPthreadRwlock* rwlock) { } int PS4_SYSV_ABI posix_pthread_rwlock_trywrlock(OrbisPthreadRwlock* rwlock) { + rwlock = createRwlock(rwlock); + if (rwlock == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } int result = pthread_rwlock_trywrlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "posix_pthread_rwlock_trywrlock: error = {}", result); @@ -85,6 +109,10 @@ int PS4_SYSV_ABI posix_pthread_rwlock_trywrlock(OrbisPthreadRwlock* rwlock) { } int PS4_SYSV_ABI posix_pthread_rwlock_unlock(OrbisPthreadRwlock* rwlock) { + rwlock = createRwlock(rwlock); + if (rwlock == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } int result = pthread_rwlock_unlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "posix_pthread_rwlock_unlock: error = {}", result); @@ -93,6 +121,10 @@ int PS4_SYSV_ABI posix_pthread_rwlock_unlock(OrbisPthreadRwlock* rwlock) { } int PS4_SYSV_ABI posix_pthread_rwlock_wrlock(OrbisPthreadRwlock* rwlock) { + rwlock = createRwlock(rwlock); + if (rwlock == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } int result = pthread_rwlock_wrlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "posix_pthread_rwlock_wrlock: error = {}", result); @@ -271,9 +303,7 @@ int PS4_SYSV_ABI scePthreadRwlockUnlock(OrbisPthreadRwlock* rwlock) { } int PS4_SYSV_ABI scePthreadRwlockWrlock(OrbisPthreadRwlock* rwlock) { - if (rwlock == nullptr || *rwlock == nullptr) { - return ORBIS_KERNEL_ERROR_EINVAL; - } + rwlock = createRwlock(rwlock); int result = pthread_rwlock_wrlock(&(*rwlock)->pth_rwlock); if (result != 0) { LOG_ERROR(Kernel_Pthread, "scePthreadRwlockWrlock: error = {}", result); @@ -282,7 +312,7 @@ int PS4_SYSV_ABI scePthreadRwlockWrlock(OrbisPthreadRwlock* rwlock) { return result; } -void ThreadsRwlockSymbolsRegister(Core::Loader::SymbolsResolver* sym) { +void RwlockSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("1471ajPzxh0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_rwlock_destroy); LIB_FUNCTION("ytQULN-nhL4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_rwlock_init); LIB_FUNCTION("iGjsr1WAtI0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_rwlock_rdlock); @@ -350,4 +380,4 @@ void ThreadsRwlockSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("+L98PIbGttk", "libkernel", 1, "libkernel", 1, 1, scePthreadRwlockUnlock); LIB_FUNCTION("mqdNorrB+gI", "libkernel", 1, "libkernel", 1, 1, scePthreadRwlockWrlock); } -} // namespace Libraries::Kernel \ No newline at end of file +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp new file mode 100644 index 00000000..e6fc667d --- /dev/null +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" + +namespace Libraries::Kernel { + +using ListBaseHook = + boost::intrusive::list_base_hook>; + +class Semaphore { +public: + Semaphore(s32 init_count, s32 max_count, const char* name, bool is_fifo) + : name{name}, token_count{init_count}, max_count{max_count}, is_fifo{is_fifo} {} + + bool Wait(bool can_block, s32 need_count, u64* timeout) { + if (HasAvailableTokens(need_count)) { + return true; + } + if (!can_block) { + return false; + } + + // Create waiting thread object and add it into the list of waiters. + WaitingThread waiter{need_count, is_fifo}; + AddWaiter(waiter); + SCOPE_EXIT { + PopWaiter(waiter); + }; + + // Perform the wait. + return waiter.Wait(timeout); + } + + bool Signal(s32 signal_count) { + std::scoped_lock lk{mutex}; + if (token_count + signal_count > max_count) { + return false; + } + token_count += signal_count; + + // Wake up threads in order of priority. + for (auto& waiter : wait_list) { + if (waiter.need_count > token_count) { + continue; + } + token_count -= waiter.need_count; + waiter.cv.notify_one(); + } + + return true; + } + +private: + struct WaitingThread : public ListBaseHook { + std::mutex mutex; + std::condition_variable cv; + u32 priority; + s32 need_count; + + explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { + if (is_fifo) { + return; + } + // Retrieve calling thread priority for sorting into waiting threads list. + s32 policy; + sched_param param; + pthread_getschedparam(pthread_self(), &policy, ¶m); + priority = param.sched_priority; + } + + bool Wait(u64* timeout) { + std::unique_lock lk{mutex}; + if (!timeout) { + // Wait indefinitely until we are woken up. + cv.wait(lk); + return true; + } + // Wait until timeout runs out, recording how much remaining time there was. + const auto start = std::chrono::high_resolution_clock::now(); + const auto status = cv.wait_for(lk, std::chrono::microseconds(*timeout)); + const auto end = std::chrono::high_resolution_clock::now(); + const auto time = + std::chrono::duration_cast(end - start).count(); + *timeout -= time; + return status != std::cv_status::timeout; + } + + bool operator<(const WaitingThread& other) const { + return priority < other.priority; + } + }; + + void AddWaiter(WaitingThread& waiter) { + std::scoped_lock lk{mutex}; + // Insert at the end of the list for FIFO order. + if (is_fifo) { + wait_list.push_back(waiter); + return; + } + // Find the first with priority less then us and insert right before it. + auto it = wait_list.begin(); + while (it != wait_list.end() && it->priority > waiter.priority) { + it++; + } + wait_list.insert(it, waiter); + } + + void PopWaiter(WaitingThread& waiter) { + std::scoped_lock lk{mutex}; + wait_list.erase(WaitingThreads::s_iterator_to(waiter)); + } + + bool HasAvailableTokens(s32 need_count) { + std::scoped_lock lk{mutex}; + if (token_count >= need_count) { + token_count -= need_count; + return true; + } + return false; + } + + using WaitingThreads = + boost::intrusive::list, + boost::intrusive::constant_time_size>; + WaitingThreads wait_list; + std::string name; + std::atomic token_count; + std::mutex mutex; + s32 max_count; + bool is_fifo; +}; + +using OrbisKernelSema = Semaphore*; + +s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u32 attr, + s32 initCount, s32 maxCount, const void* pOptParam) { + if (!pName || attr > 2 || initCount < 0 || maxCount <= 0 || initCount > maxCount) { + LOG_ERROR(Lib_Kernel, "Semaphore creation parameters are invalid!"); + return ORBIS_KERNEL_ERROR_EINVAL; + } + *sem = new Semaphore(initCount, maxCount, pName, attr == 1); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u64* pTimeout) { + ASSERT(sem->Wait(true, needCount, pTimeout)); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { + if (!sem->Signal(signalCount)) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { + if (!sem->Wait(false, needCount, nullptr)) { + return ORBIS_KERNEL_ERROR_EBUSY; + } + return ORBIS_OK; +} + +void SemaphoreSymbolsRegister(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("188x57JYp0g", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateSema); + LIB_FUNCTION("Zxa0VhQVTsk", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitSema); + LIB_FUNCTION("4czppHBiriw", "libkernel", 1, "libkernel", 1, 1, sceKernelSignalSema); + LIB_FUNCTION("12wOHk8ywb0", "libkernel", 1, "libkernel", 1, 1, sceKernelPollSema); +} + +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/threads.h b/src/core/libraries/kernel/threads/threads.h new file mode 100644 index 00000000..a3fd354b --- /dev/null +++ b/src/core/libraries/kernel/threads/threads.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "core/libraries/kernel/thread_management.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Kernel { + +int PS4_SYSV_ABI scePthreadRwlockattrInit(OrbisPthreadRwlockattr* attr); + +void SemaphoreSymbolsRegister(Core::Loader::SymbolsResolver* sym); +void RwlockSymbolsRegister(Core::Loader::SymbolsResolver* sym); +void KeySymbolsRegister(Core::Loader::SymbolsResolver* sym); + +} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index c32e8be8..63f27e78 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -153,6 +153,26 @@ int PS4_SYSV_ABI gettimeofday(OrbisKernelTimeval* tp, OrbisKernelTimezone* tz) { return sceKernelGettimeofday(tp); } +s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz) { +#ifdef _WIN64 + ASSERT(tz); + static int tzflag = 0; + if (!tzflag) { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; +#else + struct timezone tzz; + struct timeval tv; + gettimeofday(&tv, &tzz); + tz->tz_dsttime = tzz.tz_dsttime; + tz->tz_minuteswest = tzz.tz_minuteswest; +#endif + return ORBIS_OK; +} + int PS4_SYSV_ABI posix_clock_getres(u32 clock_id, OrbisKernelTimespec* res) { if (res == nullptr) { return SCE_KERNEL_ERROR_EFAULT; @@ -198,12 +218,14 @@ void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, gettimeofday); LIB_FUNCTION("QvsZxomvUHs", "libkernel", 1, "libkernel", 1, 1, sceKernelNanosleep); LIB_FUNCTION("1jfXLRVzisc", "libkernel", 1, "libkernel", 1, 1, sceKernelUsleep); + LIB_FUNCTION("QcteRwbsnV0", "libkernel", 1, "libkernel", 1, 1, posix_usleep); LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep); LIB_FUNCTION("-ZR+hG7aDHw", "libkernel", 1, "libkernel", 1, 1, sceKernelSleep); LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, sceKernelSleep); LIB_FUNCTION("yS8U2TGCe1A", "libkernel", 1, "libkernel", 1, 1, posix_nanosleep); LIB_FUNCTION("yS8U2TGCe1A", "libScePosix", 1, "libkernel", 1, 1, posix_nanosleep); LIB_FUNCTION("QBi7HCK03hw", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGettime); + LIB_FUNCTION("kOcnerypnQA", "libkernel", 1, "libkernel", 1, 1, sceKernelGettimezone); LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, posix_clock_gettime); LIB_FUNCTION("lLMT9vJAck0", "libScePosix", 1, "libkernel", 1, 1, posix_clock_gettime); LIB_FUNCTION("smIj7eqzZE8", "libScePosix", 1, "libkernel", 1, 1, posix_clock_getres); diff --git a/src/core/libraries/libc/libc.cpp b/src/core/libraries/libc/libc.cpp index d709515e..dafb16be 100644 --- a/src/core/libraries/libc/libc.cpp +++ b/src/core/libraries/libc/libc.cpp @@ -463,10 +463,10 @@ void libcSymbolsRegister(Core::Loader::SymbolsResolver* sym) { // stdio functions LIB_FUNCTION("xeYO4u7uyJ0", "libc", 1, "libc", 1, 1, ps4_fopen); - LIB_FUNCTION("hcuQgD53UxM", "libc", 1, "libc", 1, 1, ps4_printf); + // LIB_FUNCTION("hcuQgD53UxM", "libc", 1, "libc", 1, 1, ps4_printf); LIB_FUNCTION("Q2V+iqvjgC0", "libc", 1, "libc", 1, 1, ps4_vsnprintf); LIB_FUNCTION("YQ0navp+YIc", "libc", 1, "libc", 1, 1, ps4_puts); - LIB_FUNCTION("fffwELXNVFA", "libc", 1, "libc", 1, 1, ps4_fprintf); + // LIB_FUNCTION("fffwELXNVFA", "libc", 1, "libc", 1, 1, ps4_fprintf); LIB_FUNCTION("QMFyLoqNxIg", "libc", 1, "libc", 1, 1, ps4_setvbuf); LIB_FUNCTION("uodLYyUip20", "libc", 1, "libc", 1, 1, ps4_fclose); LIB_FUNCTION("rQFVBXp-Cxg", "libc", 1, "libc", 1, 1, ps4_fseek); diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index 93c98075..0607e93b 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -39,10 +39,26 @@ int PS4_SYSV_ABI internal_memcmp(const void* s1, const void* s2, size_t n) { return std::memcmp(s1, s2, n); } +int PS4_SYSV_ABI internal_strncmp(const char* str1, const char* str2, size_t num) { + return std::strncmp(str1, str2, num); +} + +int PS4_SYSV_ABI internal_strlen(const char* str) { + return std::strlen(str); +} + float PS4_SYSV_ABI internal_expf(float x) { return expf(x); } +void* PS4_SYSV_ABI internal_malloc(size_t size) { + return std::malloc(size); +} + +char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t count) { + return std::strncpy(dest, src, count); +} + void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("NFLs+dRJGNg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_memcpy_s); @@ -55,6 +71,14 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("DfivPArhucg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_memcmp); LIB_FUNCTION("8zsu04XNsZ4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_expf); + LIB_FUNCTION("aesyjrHVWy4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strncmp); + LIB_FUNCTION("j4ViWNHEgww", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strlen); + LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strncpy); + LIB_FUNCTION("gQX+4GDQjpM", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_malloc); }; -} // namespace Libraries::LibcInternal \ No newline at end of file +} // namespace Libraries::LibcInternal diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index fe36fdf6..72eca312 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -3,9 +3,48 @@ #pragma once +#include +#include "common/logging/log.h" #include "core/loader/elf.h" #include "core/loader/symbols_resolver.h" +template +struct StringLiteral { + constexpr StringLiteral(const char (&str)[N]) { + std::copy_n(str, N, value); + } + + char value[N]; +}; + +template +struct wrapper_impl; + +template +struct wrapper_impl { + static R PS4_SYSV_ABI wrap(Args... args) { + if (std::string_view(name.value) != "scePthreadEqual" && + std::string_view(name.value) != "sceUserServiceGetEvent") { + LOG_WARNING(Core_Linker, "Function {} called", name.value); + } + if constexpr (std::is_same_v || std::is_same_v) { + const u32 ret = f(args...); + if (ret != 0 && std::string_view(name.value) != "scePthreadEqual") { + LOG_WARNING(Core_Linker, "Function {} returned {:#x}", name.value, ret); + } + return ret; + } + // stuff + return f(args...); + } +}; + +template +constexpr auto wrapper = wrapper_impl::wrap; + +// #define W(foo) wrapper<#foo, decltype(&foo), foo> +#define W(foo) foo + #define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \ { \ Core::Loader::SymbolResolver sr{}; \ @@ -16,7 +55,7 @@ sr.module_version_major = moduleVersionMajor; \ sr.module_version_minor = moduleVersionMinor; \ sr.type = Core::Loader::SymbolType::Function; \ - auto func = reinterpret_cast(function); \ + auto func = reinterpret_cast(W(function)); \ sym->AddSymbol(sr, func); \ } diff --git a/src/core/libraries/rtc/rtc.cpp b/src/core/libraries/rtc/rtc.cpp index 6bf6a91b..82e6db67 100644 --- a/src/core/libraries/rtc/rtc.cpp +++ b/src/core/libraries/rtc/rtc.cpp @@ -300,4 +300,4 @@ void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("-5y2uJ62qS8", "libSceRtc", 1, "libSceRtc", 1, 1, sceRtcTickAddYears); }; -} // namespace Libraries::Rtc \ No newline at end of file +} // namespace Libraries::Rtc diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index e5db45b6..a2086af2 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -733,4 +733,4 @@ void RegisterlibSceSaveData(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("AuTE0gFxZCI", "libSceSaveData", 1, "libSceSaveData", 1, 1, Func_02E4C4D201716422); }; -} // namespace Libraries::SaveData \ No newline at end of file +} // namespace Libraries::SaveData diff --git a/src/core/libraries/system/userservice.cpp b/src/core/libraries/system/userservice.cpp index 16e5295c..8c48b311 100644 --- a/src/core/libraries/system/userservice.cpp +++ b/src/core/libraries/system/userservice.cpp @@ -104,7 +104,7 @@ int PS4_SYSV_ABI sceUserServiceGetDiscPlayerFlag() { } s32 PS4_SYSV_ABI sceUserServiceGetEvent(OrbisUserServiceEvent* event) { - LOG_INFO(Lib_UserService, "(DUMMY) called"); + LOG_TRACE(Lib_UserService, "(DUMMY) called"); // fake a loggin event static bool logged_in = false; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index b6e1ff73..e86fb1ed 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -201,18 +201,25 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { } // Reset flip label - req.port->buffer_labels[req.index] = 0; + if (req.index != -1) { + req.port->buffer_labels[req.index] = 0; + } } bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { - const auto& buffer = port->buffer_slots[index]; - const auto& group = port->groups[buffer.group_index]; - auto* frame = renderer->PrepareFrame(group, buffer.address_left); + Vulkan::Frame* frame; + if (index == -1) { + frame = renderer->PrepareBlankFrame(); + } else { + const auto& buffer = port->buffer_slots[index]; + const auto& group = port->groups[buffer.group_index]; + frame = renderer->PrepareFrame(group, buffer.address_left); + } std::scoped_lock lock{mutex}; - if (requests.size() >= port->NumRegisteredBuffers()) { + if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { LOG_ERROR(Lib_VideoOut, "Flip queue is full"); return false; } diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index bb19c586..90ac7260 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -124,14 +124,12 @@ s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode LOG_WARNING(Lib_VideoOut, "flipmode = {}", flipMode); } - ASSERT_MSG(bufferIndex != -1, "Blank output not supported"); - if (bufferIndex < -1 || bufferIndex > 15) { LOG_ERROR(Lib_VideoOut, "Invalid bufferIndex = {}", bufferIndex); return ORBIS_VIDEO_OUT_ERROR_INVALID_INDEX; } - if (port->buffer_slots[bufferIndex].group_index < 0) { + if (bufferIndex != -1 && port->buffer_slots[bufferIndex].group_index < 0) { LOG_ERROR(Lib_VideoOut, "Slot in bufferIndex = {} is not registered", bufferIndex); return ORBIS_VIDEO_OUT_ERROR_INVALID_INDEX; } @@ -196,7 +194,6 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i LOG_INFO(Lib_VideoOut, "called"); ASSERT(userId == UserService::ORBIS_USER_SERVICE_USER_ID_SYSTEM || userId == 0); ASSERT(busType == SCE_VIDEO_OUT_BUS_TYPE_MAIN); - ASSERT(param == nullptr); if (index != 0) { LOG_ERROR(Lib_VideoOut, "Index != 0"); @@ -259,6 +256,12 @@ s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** u return ORBIS_OK; } +s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo( + s32 handle, SceVideoOutDeviceCapabilityInfo* pDeviceCapabilityInfo) { + pDeviceCapabilityInfo->capability = 0; + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); @@ -299,6 +302,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("U46NwOiJpys", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutSubmitFlip); LIB_FUNCTION("SbU3dwp80lQ", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutGetFlipStatus); + LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, + sceVideoOutGetDeviceCapabilityInfo); } } // namespace Libraries::VideoOut diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index b36520a2..52426ecc 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -84,6 +84,10 @@ struct SceVideoOutVblankStatus { u8 pad1[7] = {}; }; +struct SceVideoOutDeviceCapabilityInfo { + u64 capability; +}; + void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, PixelFormat pixelFormat, u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 7867b180..2046af93 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -57,9 +57,6 @@ void Linker::Execute() { // Calculate static TLS size. for (const auto& module : m_modules) { - if (module->tls.image_size != 0) { - module->tls.modid = ++max_tls_index; - } static_tls_size += module->tls.image_size; module->tls.offset = static_tls_size; } @@ -101,7 +98,7 @@ s32 Linker::LoadModule(const std::filesystem::path& elf_name) { return -1; } - auto module = std::make_unique(elf_name); + auto module = std::make_unique(elf_name, max_tls_index); if (!module->IsValid()) { LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string()); return -1; @@ -111,8 +108,24 @@ s32 Linker::LoadModule(const std::filesystem::path& elf_name) { return m_modules.size() - 1; } +Module* Linker::FindByAddress(VAddr address) { + for (auto& module : m_modules) { + const VAddr base = module->GetBaseAddress(); + if (address >= base && address < base + module->aligned_base_size) { + return module.get(); + } + } + return nullptr; +} + void Linker::Relocate(Module* module) { - module->ForEachRelocation([&](elf_relocation* rel, bool isJmpRel) { + module->ForEachRelocation([&](elf_relocation* rel, u32 i, bool isJmpRel) { + const u32 bit_idx = + (isJmpRel ? module->dynamic_info.relocation_table_size / sizeof(elf_relocation) : 0) + + i; + if (module->TestRelaBit(bit_idx)) { + return; + } auto type = rel->GetType(); auto symbol = rel->GetSymbol(); auto addend = rel->rel_addend; @@ -167,11 +180,15 @@ void Linker::Relocate(Module* module) { switch (sym_bind) { case STB_LOCAL: symbol_vitrual_addr = rel_base_virtual_addr + sym.st_value; + module->SetRelaBit(bit_idx); break; case STB_GLOBAL: case STB_WEAK: { rel_name = namesTlb + sym.st_name; - Resolve(rel_name, rel_sym_type, module, &symrec); + if (Resolve(rel_name, rel_sym_type, module, &symrec)) { + // Only set the rela bit if the symbol was actually resolved and not stubbed. + module->SetRelaBit(bit_idx); + } symbol_vitrual_addr = symrec.virtual_address; break; } @@ -203,14 +220,14 @@ const Module* Linker::FindExportedModule(const ModuleInfo& module, const Library return it == m_modules.end() ? nullptr : it->get(); } -void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Module* m, +bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Module* m, Loader::SymbolRecord* return_info) { const auto ids = Common::SplitString(name, '#'); if (ids.size() != 3) { return_info->virtual_address = 0; return_info->name = name; LOG_ERROR(Core_Linker, "Not Resolved {}", name); - return; + return false; } const LibraryInfo* library = m->FindLibrary(ids[1]); @@ -236,7 +253,7 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul } if (record) { *return_info = *record; - return; + return true; } const auto aeronid = AeroLib::FindByNid(sr.name.c_str()); @@ -249,18 +266,42 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul } LOG_ERROR(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name, return_info->name, library->name, module->name); + return false; } void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; - ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter, - "Reallocation of DTV table is not supported"); + if (dtv_table[0].counter != dtv_generation_counter) { + // Generation counter changed, a dynamic module was either loaded or unloaded. + const u32 old_num_dtvs = dtv_table[1].counter; + ASSERT_MSG(max_tls_index > old_num_dtvs, "Module unloading unsupported"); + // Module was loaded, increase DTV table size. + DtvEntry* new_dtv_table = new DtvEntry[max_tls_index + 2]; + std::memcpy(new_dtv_table + 2, dtv_table + 2, old_num_dtvs * sizeof(DtvEntry)); + new_dtv_table[0].counter = dtv_generation_counter; + new_dtv_table[1].counter = max_tls_index; + delete[] dtv_table; - void* module = (u8*)dtv_table[module_index + 1].pointer + offset; - ASSERT_MSG(module, "DTV allocation is not supported"); - return module; + // Update TCB pointer. + GetTcbBase()->tcb_dtv = new_dtv_table; + dtv_table = new_dtv_table; + } + + u8* addr = dtv_table[module_index + 1].pointer; + if (!addr) { + // Module was just loaded by above code. Allocate TLS block for it. + Module* module = m_modules[module_index - 1].get(); + const u32 init_image_size = module->tls.init_image_size; + u8* dest = reinterpret_cast(heap_api_func(module->tls.image_size)); + const u8* src = reinterpret_cast(module->tls.image_virtual_addr); + std::memcpy(dest, src, init_image_size); + std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); + dtv_table[module_index + 1].pointer = dest; + addr = dest; + } + return addr + offset; } void Linker::InitTlsForThread(bool is_primary) { diff --git a/src/core/linker.h b/src/core/linker.h index f5414ec6..bc12de7d 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include "core/module.h" @@ -37,17 +38,32 @@ public: return m_modules.at(index).get(); } + void RelocateAnyImports(Module* m) { + Relocate(m); + for (auto& module : m_modules) { + const auto imports = module->GetImportModules(); + if (std::ranges::contains(imports, m->name, &ModuleInfo::name)) { + Relocate(module.get()); + } + } + } + void SetHeapApiFunc(void* func) { heap_api_func = *reinterpret_cast(func); } + void AdvanceGenerationCounter() noexcept { + dtv_generation_counter++; + } + void* TlsGetAddr(u64 module_index, u64 offset); void InitTlsForThread(bool is_primary = false); s32 LoadModule(const std::filesystem::path& elf_name); + Module* FindByAddress(VAddr address); void Relocate(Module* module); - void Resolve(const std::string& name, Loader::SymbolType type, Module* module, + bool Resolve(const std::string& name, Loader::SymbolType type, Module* module, Loader::SymbolRecord* return_info); void Execute(); void DebugDump(); @@ -58,7 +74,7 @@ private: std::mutex mutex; u32 dtv_generation_counter{1}; size_t static_tls_size{}; - size_t max_tls_index{}; + u32 max_tls_index{}; HeapApiFunc heap_api_func{}; std::vector> m_modules; Loader::SymbolsResolver m_hle_symbols{}; diff --git a/src/core/loader/dwarf.cpp b/src/core/loader/dwarf.cpp new file mode 100644 index 00000000..1e054cb8 --- /dev/null +++ b/src/core/loader/dwarf.cpp @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: Copyright (C) 2001-2024 Free Software Foundation, Inc. +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/loader/dwarf.h" + +namespace Dwarf { + +template +T get(uintptr_t addr) { + T val; + memcpy(&val, reinterpret_cast(addr), sizeof(T)); + return val; +} + +static uintptr_t getEncodedP(uintptr_t& addr, uintptr_t end, u8 encoding, uintptr_t datarelBase) { + const uintptr_t startAddr = addr; + const u8* p = (u8*)addr; + uintptr_t result; + + // First get value + switch (encoding & 0x0F) { + case DW_EH_PE_ptr: + result = get(addr); + p += sizeof(uintptr_t); + addr = (uintptr_t)p; + break; + case DW_EH_PE_udata2: + result = get(addr); + p += sizeof(u16); + addr = (uintptr_t)p; + break; + case DW_EH_PE_udata4: + result = get(addr); + p += sizeof(u32); + addr = (uintptr_t)p; + break; + case DW_EH_PE_udata8: + result = get(addr); + p += sizeof(u64); + addr = (uintptr_t)p; + break; + case DW_EH_PE_sdata2: + // Sign extend from signed 16-bit value. + result = get(addr); + p += sizeof(s16); + addr = (uintptr_t)p; + break; + case DW_EH_PE_sdata4: + // Sign extend from signed 32-bit value. + result = get(addr); + p += sizeof(s32); + addr = (uintptr_t)p; + break; + case DW_EH_PE_sdata8: + result = get(addr); + p += sizeof(s64); + addr = (uintptr_t)p; + break; + default: + UNREACHABLE_MSG("unknown pointer encoding"); + } + + // Then add relative offset + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += startAddr; + break; + case DW_EH_PE_textrel: + UNREACHABLE_MSG("DW_EH_PE_textrel pointer encoding not supported"); + break; + case DW_EH_PE_datarel: + // DW_EH_PE_datarel is only valid in a few places, so the parameter has a + // default value of 0, and we abort in the event that someone calls this + // function with a datarelBase of 0 and DW_EH_PE_datarel encoding. + if (datarelBase == 0) + UNREACHABLE_MSG("DW_EH_PE_datarel is invalid with a datarelBase of 0"); + result += datarelBase; + break; + case DW_EH_PE_funcrel: + UNREACHABLE_MSG("DW_EH_PE_funcrel pointer encoding not supported"); + break; + case DW_EH_PE_aligned: + UNREACHABLE_MSG("DW_EH_PE_aligned pointer encoding not supported"); + break; + default: + UNREACHABLE_MSG("unknown pointer encoding"); + break; + } + + if (encoding & DW_EH_PE_indirect) { + result = get(result); + } + + return result; +} + +bool DecodeEHHdr(uintptr_t ehHdrStart, uintptr_t ehHdrEnd, EHHeaderInfo& ehHdrInfo) { + auto p = ehHdrStart; + // Ensure that we don't read data beyond the end of .eh_frame_hdr + if (ehHdrEnd - ehHdrStart < 4) { + // Don't print a message for an empty .eh_frame_hdr (this can happen if + // the linker script defines symbols for it even in the empty case). + if (ehHdrEnd == ehHdrStart) { + return false; + } + LOG_ERROR(Core_Linker, + "Unsupported .eh_frame_hdr at {:#x} " + "need at least 4 bytes of data but only got {:#x}", + ehHdrStart, ehHdrEnd - ehHdrStart); + return false; + } + + const u8 version = get(p++); + if (version != 1) { + LOG_CRITICAL(Core_Linker, "Unsupported .eh_frame_hdr version: {:#x} at {:#x}", version, + ehHdrStart); + return false; + } + + const u8 eh_frame_ptr_enc = get(p++); + const u8 fde_count_enc = get(p++); + ehHdrInfo.table_enc = get(p++); + + ehHdrInfo.eh_frame_ptr = getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart); + ehHdrInfo.fde_count = + fde_count_enc == DW_EH_PE_omit ? 0 : getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart); + ehHdrInfo.table = p; + + return true; +} + +} // namespace Dwarf diff --git a/src/core/loader/dwarf.h b/src/core/loader/dwarf.h new file mode 100644 index 00000000..876b60fd --- /dev/null +++ b/src/core/loader/dwarf.h @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: Copyright (C) 2001-2024 Free Software Foundation, Inc. +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Dwarf { + +enum { + DW_EH_PE_ptr = 0x00, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_signed = 0x08, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_absptr = 0x00, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + DW_EH_PE_indirect = 0x80, + DW_EH_PE_omit = 0xFF +}; + +/// Information encoded in the EH frame header. +struct EHHeaderInfo { + uintptr_t eh_frame_ptr; + size_t fde_count; + uintptr_t table; + u8 table_enc; +}; + +bool DecodeEHHdr(uintptr_t ehHdrStart, uintptr_t ehHdrEnd, EHHeaderInfo& ehHdrInfo); + +} // namespace Dwarf diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index 026102b9..8e718837 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -449,6 +449,15 @@ constexpr u32 R_X86_64_JUMP_SLOT = 7; // Create PLT entry constexpr u32 R_X86_64_RELATIVE = 8; // Adjust by program base constexpr u32 R_X86_64_DTPMOD64 = 16; +struct eh_frame_hdr { + uint8_t version; + uint8_t eh_frame_ptr_enc; + uint8_t fde_count_enc; + uint8_t table_enc; + uint32_t eh_frame_ptr; + uint32_t fde_count; +}; + namespace Core::Loader { class Elf { diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5029f82c..21261d2d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -84,10 +84,14 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M MemoryMapFlags flags, VMAType type, std::string_view name, bool is_exec, PAddr phys_addr, u64 alignment) { std::scoped_lock lk{mutex}; + if (type == VMAType::Flexible && total_flexible_usage + size > 448_MB) { + return SCE_KERNEL_ERROR_ENOMEM; + } // When virtual addr is zero, force it to virtual_base. The guest cannot pass Fixed // flag so we will take the branch that searches for free (or reserved) mappings. virtual_addr = (virtual_addr == 0) ? impl.VirtualBase() : virtual_addr; + alignment = alignment > 0 ? alignment : 16_KB; VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; SCOPE_EXIT { @@ -101,6 +105,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.phys_base = phys_addr; MapVulkanMemory(mapped_addr, size); } + if (type == VMAType::Flexible) { + total_flexible_usage += size; + } }; // Fixed mapping means the virtual address must exactly match the provided one. @@ -114,12 +121,18 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { auto it = FindVMA(mapped_addr); - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; + // If the VMA is free and contains the requested mapping we are done. + if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { + mapped_addr = virtual_addr; + } else { + // Search for the first free VMA that fits our mapping. + while (it->second.type != VMAType::Free || it->second.size < size) { + it++; + } + ASSERT(it != vma_map.end()); + const auto& vma = it->second; + mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; } - ASSERT(it != vma_map.end()); - const VAddr base = it->second.base; - mapped_addr = alignment > 0 ? Common::AlignUp(base, alignment) : base; } // Perform the mapping. @@ -128,6 +141,35 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M return ORBIS_OK; } +int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, + MemoryMapFlags flags, uintptr_t fd, size_t offset) { + ASSERT(virtual_addr == 0); + virtual_addr = impl.VirtualBase(); + const size_t size_aligned = Common::AlignUp(size, 16_KB); + + // Find first free area to map the file. + auto it = FindVMA(virtual_addr); + while (it->second.type != VMAType::Free || it->second.size < size_aligned) { + it++; + } + ASSERT(it != vma_map.end()); + + // Map the file. + const VAddr mapped_addr = it->second.base; + impl.MapFile(mapped_addr, size, offset, fd); + + // Add virtual memory area + auto& new_vma = AddMapping(mapped_addr, size_aligned); + new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); + new_vma.prot = prot; + new_vma.name = "File"; + new_vma.fd = fd; + new_vma.type = VMAType::File; + + *out_addr = std::bit_cast(mapped_addr); + return ORBIS_OK; +} + void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; @@ -137,10 +179,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { "Attempting to unmap partially mapped range"); const auto type = it->second.type; - const PAddr phys_addr = type == VMAType::Direct ? it->second.phys_base : -1; + const bool has_backing = type == VMAType::Direct || type == VMAType::File; if (type == VMAType::Direct) { UnmapVulkanMemory(virtual_addr, size); } + if (type == VMAType::Flexible) { + total_flexible_usage -= size; + } // Mark region as free and attempt to coalesce it with neighbours. auto& vma = it->second; @@ -150,7 +195,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { MergeAdjacent(vma_map, it); // Unmap the memory region. - impl.Unmap(virtual_addr, size, phys_addr); + impl.Unmap(virtual_addr, size, has_backing); TRACK_FREE(virtual_addr, "VMEM"); } @@ -200,7 +245,7 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); - if (dmem_area->second.is_free && find_next) { + while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) { dmem_area++; } diff --git a/src/core/memory.h b/src/core/memory.h index c5d130c0..d7ec6fc6 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -36,8 +36,12 @@ enum class MemoryProt : u32 { enum class MemoryMapFlags : u32 { NoFlags = 0, + Shared = 1, + Private = 2, Fixed = 0x10, NoOverwrite = 0x0080, + NoSync = 0x800, + NoCore = 0x20000, NoCoalesce = 0x400000, }; DECLARE_ENUM_FLAG_OPERATORS(MemoryMapFlags) @@ -50,6 +54,7 @@ enum class VMAType : u32 { Pooled = 4, Stack = 5, Code = 6, + File = 7, }; struct DirectMemoryArea { @@ -81,6 +86,11 @@ struct VirtualMemoryArea { MemoryProt prot = MemoryProt::NoAccess; bool disallow_merge = false; std::string name = ""; + uintptr_t fd = 0; + + bool Contains(VAddr addr, size_t size) const { + return addr >= base && (addr + size) < (base + this->size); + } bool CanMergeWith(const VirtualMemoryArea& next) const { if (disallow_merge || next.disallow_merge) { @@ -123,6 +133,9 @@ public: MemoryMapFlags flags, VMAType type, std::string_view name = "", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); + int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, + MemoryMapFlags flags, uintptr_t fd, size_t offset); + void UnmapMemory(VAddr virtual_addr, size_t size); int QueryProtection(VAddr addr, void** start, void** end, u32* prot); @@ -182,6 +195,7 @@ private: DMemMap dmem_map; VMAMap vma_map; std::recursive_mutex mutex; + size_t total_flexible_usage{}; struct MappedMemory { vk::UniqueBuffer buffer; diff --git a/src/core/module.cpp b/src/core/module.cpp index 661e4cbd..ffb443b3 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -7,6 +7,7 @@ #include "common/logging/log.h" #include "common/string_util.h" #include "core/aerolib/aerolib.h" +#include "core/loader/dwarf.h" #include "core/memory.h" #include "core/module.h" #include "core/tls.h" @@ -54,10 +55,11 @@ static std::string EncodeId(u64 nVal) { return enc; } -Module::Module(const std::filesystem::path& file_) : file{file_} { +Module::Module(const std::filesystem::path& file_, u32& max_tls_index) + : file{file_}, name{file.stem().string()} { elf.Open(file); if (elf.IsElfFile()) { - LoadModuleToMemory(); + LoadModuleToMemory(max_tls_index); LoadDynamicInfo(); LoadSymbols(); } @@ -65,13 +67,13 @@ Module::Module(const std::filesystem::path& file_) : file{file_} { Module::~Module() = default; -void Module::Start(size_t args, const void* argp, void* param) { - LOG_INFO(Core_Linker, "Module started : {}", file.filename().string()); +s32 Module::Start(size_t args, const void* argp, void* param) { + LOG_INFO(Core_Linker, "Module started : {}", name); const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress(); - reinterpret_cast(addr)(args, argp, param); + return reinterpret_cast(addr)(args, argp, param); } -void Module::LoadModuleToMemory() { +void Module::LoadModuleToMemory(u32& max_tls_index) { static constexpr size_t BlockAlign = 0x1000; static constexpr u64 TrampolineSize = 8_MB; @@ -84,7 +86,6 @@ void Module::LoadModuleToMemory() { // Map module segments (and possible TLS trampolines) auto* memory = Core::Memory::Instance(); void** out_addr = reinterpret_cast(&base_virtual_addr); - const auto name = file.filename().string(); memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true); LoadAddress += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); @@ -98,6 +99,17 @@ void Module::LoadModuleToMemory() { LOG_INFO(Core_Linker, "base_size ..............: {:#018x}", base_size); LOG_INFO(Core_Linker, "aligned_base_size ......: {:#018x}", aligned_base_size); + const auto add_segment = [this](const elf_program_header& phdr, bool do_map = true) { + const VAddr segment_addr = base_virtual_addr + phdr.p_vaddr; + if (do_map) { + elf.LoadSegment(segment_addr, phdr.p_offset, phdr.p_filesz); + } + auto& segment = info.segments[info.num_segments++]; + segment.address = segment_addr; + segment.prot = phdr.p_flags; + segment.size = GetAlignedSize(phdr); + }; + for (u16 i = 0; i < elf_header.e_phnum; i++) { const auto header_type = elf.ElfPheaderTypeStr(elf_pheader[i].p_type); switch (elf_pheader[i].p_type) { @@ -118,13 +130,14 @@ void Module::LoadModuleToMemory() { LOG_INFO(Core_Linker, "segment_memory_size ...: {}", segment_memory_size); LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode); - elf.LoadSegment(segment_addr, elf_pheader[i].p_offset, segment_file_size); + add_segment(elf_pheader[i]); if (elf_pheader[i].p_flags & PF_EXEC) { - PatchTLS(segment_addr, segment_memory_size, c); + PatchTLS(segment_addr, segment_file_size, c); } break; } case PT_DYNAMIC: + add_segment(elf_pheader[i], false); if (elf_pheader[i].p_filesz != 0) { m_dynamic.resize(elf_pheader[i].p_filesz); const VAddr segment_addr = std::bit_cast(m_dynamic.data()); @@ -147,12 +160,31 @@ void Module::LoadModuleToMemory() { tls.align = elf_pheader[i].p_align; tls.image_virtual_addr = elf_pheader[i].p_vaddr + base_virtual_addr; tls.image_size = GetAlignedSize(elf_pheader[i]); + if (tls.image_size != 0) { + tls.modid = ++max_tls_index; + } LOG_INFO(Core_Linker, "TLS virtual address = {:#x}", tls.image_virtual_addr); LOG_INFO(Core_Linker, "TLS image size = {}", tls.image_size); break; case PT_SCE_PROCPARAM: proc_param_virtual_addr = elf_pheader[i].p_vaddr + base_virtual_addr; break; + case PT_GNU_EH_FRAME: { + eh_frame_hdr_addr = elf_pheader[i].p_vaddr; + eh_frame_hdr_size = elf_pheader[i].p_memsz; + const VAddr eh_hdr_start = base_virtual_addr + eh_frame_hdr_addr; + const VAddr eh_hdr_end = eh_hdr_start + eh_frame_hdr_size; + Dwarf::EHHeaderInfo hdr_info; + if (Dwarf::DecodeEHHdr(eh_hdr_start, eh_hdr_end, hdr_info)) { + eh_frame_addr = hdr_info.eh_frame_ptr - base_virtual_addr; + if (eh_frame_hdr_addr > eh_frame_addr) { + eh_frame_size = (eh_frame_hdr_addr - eh_frame_addr); + } else { + eh_frame_size = (aligned_base_size - eh_frame_hdr_addr); + } + } + break; + } default: LOG_ERROR(Core_Linker, "Unimplemented type {}", header_type); } @@ -287,8 +319,8 @@ void Module::LoadDynamicInfo() { // the given app. How exactly this is generated isn't known, however it is not necessary // to have a valid fingerprint. While an invalid fingerprint will cause a warning to be // printed to the kernel log, the ELF will still load and run. - LOG_INFO(Core_Linker, "unsupported DT_SCE_FINGERPRINT value = ..........: {:#018x}", - dyn->d_un.d_val); + LOG_INFO(Core_Linker, "DT_SCE_FINGERPRINT value = {:#018x}", dyn->d_un.d_val); + std::memcpy(info.fingerprint.data(), &dyn->d_un.d_val, sizeof(SCE_DBG_NUM_FINGERPRINT)); break; case DT_SCE_IMPORT_LIB_ATTR: // The upper 32-bits should contain the module index multiplied by 0x10000. The lower @@ -304,6 +336,8 @@ void Module::LoadDynamicInfo() { info.value = dyn->d_un.d_val; info.name = dynamic_info.str_table + info.name_offset; info.enc_id = EncodeId(info.id); + const std::string full_name = info.name + ".sprx"; + full_name.copy(this->info.name.data(), full_name.size()); break; }; case DT_SCE_MODULE_ATTR: @@ -321,6 +355,9 @@ void Module::LoadDynamicInfo() { LOG_INFO(Core_Linker, "unsupported dynamic tag ..........: {:#018x}", dyn->d_tag); } } + const u32 relabits_num = dynamic_info.relocation_table_size / sizeof(elf_relocation) + + dynamic_info.jmp_relocation_table_size / sizeof(elf_relocation); + rela_bits.resize((relabits_num + 7) / 8); } void Module::LoadSymbols() { @@ -384,6 +421,26 @@ void Module::LoadSymbols() { symbol_database(import_sym, false); } +OrbisKernelModuleInfoEx Module::GetModuleInfoEx() const { + return OrbisKernelModuleInfoEx{ + .name = info.name, + .tls_index = tls.modid, + .tls_init_addr = tls.image_virtual_addr, + .tls_init_size = tls.init_image_size, + .tls_size = tls.image_size, + .tls_offset = tls.offset, + .tls_align = tls.align, + .init_proc_addr = base_virtual_addr + dynamic_info.init_virtual_addr, + .fini_proc_addr = base_virtual_addr + dynamic_info.fini_virtual_addr, + .eh_frame_hdr_addr = eh_frame_hdr_addr, + .eh_frame_addr = eh_frame_addr, + .eh_frame_hdr_size = eh_frame_hdr_size, + .eh_frame_size = eh_frame_size, + .segments = info.segments, + .segment_count = info.num_segments, + }; +} + const ModuleInfo* Module::FindModule(std::string_view id) { const auto& import_modules = dynamic_info.import_modules; for (u32 i = 0; const auto& mod : import_modules) { diff --git a/src/core/module.h b/src/core/module.h index bb11439e..d4079c7a 100644 --- a/src/core/module.h +++ b/src/core/module.h @@ -11,6 +11,46 @@ namespace Core { +static constexpr size_t SCE_DBG_MAX_NAME_LENGTH = 256; +static constexpr size_t SCE_DBG_MAX_SEGMENTS = 4; +static constexpr size_t SCE_DBG_NUM_FINGERPRINT = 20; + +struct OrbisKernelModuleSegmentInfo { + VAddr address; + u32 size; + s32 prot; +}; + +struct OrbisKernelModuleInfo { + u64 st_size = sizeof(OrbisKernelModuleInfo); + std::array name; + std::array segments; + u32 num_segments; + std::array fingerprint; +}; + +struct OrbisKernelModuleInfoEx { + u64 st_size = sizeof(OrbisKernelModuleInfoEx); + std::array name; + s32 id; + u32 tls_index; + VAddr tls_init_addr; + u32 tls_init_size; + u32 tls_size; + u32 tls_offset; + u32 tls_align; + VAddr init_proc_addr; + VAddr fini_proc_addr; + u64 reserved1; + u64 reserved2; + VAddr eh_frame_hdr_addr; + VAddr eh_frame_addr; + u32 eh_frame_hdr_size; + u32 eh_frame_size; + std::array segments; + u32 segment_count; +}; + struct ModuleInfo { bool operator==(const ModuleInfo& other) const { return version_major == other.version_major && version_minor == other.version_minor && @@ -46,12 +86,12 @@ struct LibraryInfo { }; struct ThreadLocalImage { - u64 align; - u64 image_size; - u64 offset; + u32 align; + u32 image_size; + u32 offset; u32 modid; VAddr image_virtual_addr; - u64 init_image_size; + u32 init_image_size; }; struct DynamicModuleInfo { @@ -100,7 +140,7 @@ using ModuleFunc = int (*)(size_t, const void*); class Module { public: - explicit Module(const std::filesystem::path& file); + explicit Module(const std::filesystem::path& file, u32& max_tls_index); ~Module(); VAddr GetBaseAddress() const noexcept { @@ -111,6 +151,10 @@ public: return base_virtual_addr + elf.GetElfEntry(); } + OrbisKernelModuleInfo GetModuleInfo() const noexcept { + return info; + } + bool IsValid() const noexcept { return base_virtual_addr != 0; } @@ -151,33 +195,49 @@ public: void ForEachRelocation(auto&& func) { for (u32 i = 0; i < dynamic_info.relocation_table_size / sizeof(elf_relocation); i++) { - func(&dynamic_info.relocation_table[i], false); + func(&dynamic_info.relocation_table[i], i, false); } for (u32 i = 0; i < dynamic_info.jmp_relocation_table_size / sizeof(elf_relocation); i++) { - func(&dynamic_info.jmp_relocation_table[i], true); + func(&dynamic_info.jmp_relocation_table[i], i, true); } } - void Start(size_t args, const void* argp, void* param); - void LoadModuleToMemory(); + void SetRelaBit(u32 index) { + rela_bits[index >> 3] |= (1 << (index & 7)); + } + + bool TestRelaBit(u32 index) const { + return (rela_bits[index >> 3] >> (index & 7)) & 1; + } + + s32 Start(size_t args, const void* argp, void* param); + void LoadModuleToMemory(u32& max_tls_index); void LoadDynamicInfo(); void LoadSymbols(); + OrbisKernelModuleInfoEx GetModuleInfoEx() const; const ModuleInfo* FindModule(std::string_view id); const LibraryInfo* FindLibrary(std::string_view id); public: std::filesystem::path file; + std::string name; Loader::Elf elf; u64 aligned_base_size{}; VAddr base_virtual_addr{}; VAddr proc_param_virtual_addr{}; + VAddr eh_frame_hdr_addr{}; + VAddr eh_frame_addr{}; + u32 eh_frame_hdr_size{}; + u32 eh_frame_size{}; DynamicModuleInfo dynamic_info{}; std::vector m_dynamic; std::vector m_dynamic_data; Loader::SymbolsResolver export_sym; Loader::SymbolsResolver import_sym; ThreadLocalImage tls{}; + OrbisKernelModuleInfo info{}; + std::vector rela_bits; }; } // namespace Core diff --git a/src/core/tls.cpp b/src/core/tls.cpp index a6eb36fa..cf7e1584 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -125,7 +125,8 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg); c.putSeg(fs); c.mov(target_reg, qword[SelfInTcbheadOffset]); // Load self member pointer of tcbhead_t. - c.add(target_reg, SpecificFirstBlockOffset + sizeof(uintptr_t) + slot * PthreadKeyDataSize); + c.add(target_reg, SpecificFirstBlockOffset + sizeof(uintptr_t) * 2 + slot * PthreadKeyDataSize); + c.mov(target_reg, qword[target_reg]); c.jmp(code + total_size); // Return to the instruction right after the mov. } diff --git a/src/core/tls.h b/src/core/tls.h index 3f154420..8e546935 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -13,7 +13,7 @@ namespace Core { union DtvEntry { size_t counter; - void* pointer; + u8* pointer; }; struct Tcb { diff --git a/src/emulator.cpp b/src/emulator.cpp index b887685b..95fbbfa8 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -94,7 +94,11 @@ void Emulator::Run(const std::filesystem::path& file) { if (std::filesystem::is_directory(sce_module_folder)) { for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) { if (entry.path().filename() == "libc.prx" || - entry.path().filename() == "libSceFios2.prx") { + entry.path().filename() == "libSceFios2.prx" || + entry.path().filename() == "libSceAudioLatencyEstimation.prx" || + entry.path().filename() == "libSceJobManager.prx" || + entry.path().filename() == "libSceNpToolkit2.prx" || + entry.path().filename() == "libSceS3DConversion.prx") { found = true; LOG_INFO(Loader, "Loading {}", entry.path().string().c_str()); linker->LoadModule(entry.path()); @@ -126,7 +130,8 @@ void Emulator::Run(const std::filesystem::path& file) { void Emulator::LoadSystemModules(const std::filesystem::path& file) { const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir); for (const auto& entry : std::filesystem::directory_iterator(sys_module_path)) { - if (entry.path().filename() == "libSceNgs2.sprx") { + if (entry.path().filename() == "libSceNgs2.sprx" || + entry.path().filename() == "libSceLibcInternal.sprx") { LOG_INFO(Loader, "Loading {}", entry.path().string().c_str()); linker->LoadModule(entry.path()); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index d3db3766..69005f1a 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -160,7 +160,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) { } } -void Translator::S_AND_B64(const GcnInst& inst) { +void Translator::S_AND_B64(bool negate, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { case OperandField::VccLo: @@ -175,7 +175,10 @@ void Translator::S_AND_B64(const GcnInst& inst) { }; const IR::U1 src0{get_src(inst.src[0])}; const IR::U1 src1{get_src(inst.src[1])}; - const IR::U1 result = ir.LogicalAnd(src0, src1); + IR::U1 result = ir.LogicalAnd(src0, src1); + if (negate) { + result = ir.LogicalNot(result); + } ir.SetScc(result); switch (inst.dst[0].field) { case OperandField::VccLo: diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index a32cde39..610c11e7 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -121,6 +121,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { case OperandField::ConstFloatNeg_2_0: value = ir.Imm32(-2.0f); break; + case OperandField::ConstFloatNeg_4_0: + value = ir.Imm32(-4.0f); + break; case OperandField::VccLo: if (force_flt) { value = ir.BitCast(ir.GetVccLo()); @@ -304,6 +307,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MADAK_F32: // Yes these can share the opcode translator.V_FMA_F32(inst); break; + case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: @@ -372,6 +376,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CMP_NLE_F32: translator.V_CMP_F32(ConditionOp::GT, false, inst); break; + case Opcode::V_CMP_NLT_F32: + translator.V_CMP_F32(ConditionOp::GE, false, inst); + break; case Opcode::S_CMP_LG_U32: translator.S_CMP(ConditionOp::LG, false, inst); break; @@ -563,7 +570,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.S_OR_B64(true, inst); break; case Opcode::S_AND_B64: - translator.S_AND_B64(inst); + translator.S_AND_B64(false, inst); + break; + case Opcode::S_NAND_B64: + translator.S_AND_B64(true, inst); break; case Opcode::V_LSHRREV_B32: translator.V_LSHRREV_B32(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 64d6d7f0..64e45a2c 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -41,7 +41,7 @@ public: void S_AND_SAVEEXEC_B64(const GcnInst& inst); void S_MOV_B64(const GcnInst& inst); void S_OR_B64(bool negate, const GcnInst& inst); - void S_AND_B64(const GcnInst& inst); + void S_AND_B64(bool negate, const GcnInst& inst); void S_ADD_I32(const GcnInst& inst); void S_AND_B32(const GcnInst& inst); void S_LSHR_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 2281a038..7a5bd49e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -15,7 +15,8 @@ void Translator::V_SAD(const GcnInst& inst) { } void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), GetSrc(inst.dst[0]))); + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true), + GetSrc(inst.dst[0], true))); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { @@ -134,13 +135,13 @@ void Translator::V_FLOOR_F32(const GcnInst& inst) { } void Translator::V_SUB_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; SetDst(inst.dst[0], ir.FPSub(src0, src1)); } void Translator::V_RCP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src0{GetSrc(inst.src[0], true)}; SetDst(inst.dst[0], ir.FPRecip(src0)); } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index e0cf86aa..956e65a1 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -22,8 +22,6 @@ Liverpool::Liverpool() { Liverpool::~Liverpool() { process_thread.request_stop(); - num_submits = -1; - num_submits.notify_one(); process_thread.join(); } @@ -31,8 +29,10 @@ void Liverpool::Process(std::stop_token stoken) { Common::SetCurrentThreadName("GPU_CommandProcessor"); while (!stoken.stop_requested()) { - num_submits.wait(0); - + { + std::unique_lock lk{submit_mutex}; + submit_cv.wait(lk, stoken, [this] { return num_submits != 0; }); + } if (stoken.stop_requested()) { break; } @@ -67,7 +67,8 @@ void Liverpool::Process(std::stop_token stoken) { } if (submit_done) { - num_submits.notify_all(); + std::scoped_lock lk{submit_mutex}; + submit_cv.notify_all(); submit_done = false; } } @@ -76,9 +77,8 @@ void Liverpool::Process(std::stop_token stoken) { void Liverpool::WaitGpuIdle() { RENDERER_TRACE; - while (const auto old = num_submits.load()) { - num_submits.wait(old); - } + std::unique_lock lk{submit_mutex}; + submit_cv.wait(lk, [this] { return num_submits == 0; }); } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { @@ -369,7 +369,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(opcode), count); } - dcb = dcb.subspan(header->type3.NumWords() + 1); } @@ -415,8 +414,9 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { queue.submits.emplace(task.handle); } + std::scoped_lock lk{submit_mutex}; ++num_submits; - num_submits.notify_one(); + submit_cv.notify_one(); } void Liverpool::SubmitAsc(u32 vqid, std::span acb) { @@ -429,8 +429,9 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { queue.submits.emplace(task.handle); } + std::scoped_lock lk{submit_mutex}; ++num_submits; - num_submits.notify_one(); + submit_cv.notify_one(); } } // namespace AmdGpu diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0a427c91..dd717990 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,6 +10,7 @@ #include "video_core/amdgpu/pixel_format.h" #include +#include #include #include #include @@ -479,9 +480,9 @@ struct Liverpool { }; struct Scissor { - union { - BitField<0, 16, s32> top_left_x; - BitField<16, 16, s32> top_left_y; + struct { + s16 top_left_x; + s16 top_left_y; }; union { BitField<0, 15, u32> bottom_right_x; @@ -865,13 +866,15 @@ public: void SubmitAsc(u32 vqid, std::span acb); void WaitGpuIdle(); + bool IsGpuIdle() const { return num_submits == 0; } void NotifySubmitDone() { + std::scoped_lock lk{submit_mutex}; submit_done = true; - num_submits.notify_all(); + submit_cv.notify_all(); } void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { @@ -939,7 +942,9 @@ private: Vulkan::Rasterizer* rasterizer{}; std::jthread process_thread{}; - std::atomic num_submits{}; + u32 num_submits{}; + std::mutex submit_mutex; + std::condition_variable_any submit_cv; std::atomic submit_done{}; }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 64721b62..cb15080f 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -281,7 +281,8 @@ struct Sampler { }; float LodBias() const noexcept { - return static_cast(lod_bias); + return static_cast(static_cast((lod_bias.Value() ^ 0x2000u) - 0x2000u)) / + 256.0f; } float MinLod() const noexcept { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8ca82f82..0096d34b 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -347,6 +347,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR8G8Unorm; } + if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc2UnormBlock; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } @@ -367,6 +370,10 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format == DepthBuffer::StencilFormat::Stencil8) { return vk::Format::eD16UnormS8Uint; } + if (z_format == DepthBuffer::ZFormat::Invald && + stencil_format == DepthBuffer::StencilFormat::Invalid) { + return vk::Format::eUndefined; + } UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 87fb447b..dc986249 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -9,6 +9,7 @@ #include "sdl_window.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/texture_cache/image.h" #include @@ -199,6 +200,11 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr return PrepareFrameInternal(image); } +Frame* RendererVulkan::PrepareBlankFrame() { + auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); + return PrepareFrameInternal(image); +} + Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a3627584..f4b1a608 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -39,6 +39,7 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address); + Frame* PrepareBlankFrame(); bool ShowSplash(Frame* frame = nullptr); void Present(Frame* frame); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index c851aa9d..7d1a980c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,7 +36,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler for (const auto& image : info.images) { bindings.push_back({ .binding = binding++, - .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8f438020..4a811eba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,7 +78,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .depthClampEnable = false, .rasterizerDiscardEnable = false, .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), - .cullMode = LiverpoolToVK::CullMode(key.cull_mode), + .cullMode = vk::CullModeFlagBits::eNone /*LiverpoolToVK::CullMode(key.cull_mode)*/, .frontFace = key.front_face == Liverpool::FrontFace::Clockwise ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise, @@ -289,7 +289,8 @@ void GraphicsPipeline::BuildDescSetLayout() { for (const auto& image : stage.images) { bindings.push_back({ .binding = binding++, - .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, }); @@ -316,8 +317,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& BindVertexBuffers(staging); // Bind resource buffers and textures. - boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; + boost::container::static_vector buffer_infos; + boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 5db40524..4b38aa3d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -79,6 +79,10 @@ public: return key.write_masks; } + [[nodiscard]] bool IsDepthEnabled() const { + return key.depth.depth_enable.Value(); + } + private: void BuildDescSetLayout(); void BindVertexBuffers(StreamBuffer& staging) const; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 141ac635..139f7715 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,6 +6,7 @@ #include "common/io_file.h" #include "common/path_util.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/exception.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -88,6 +89,8 @@ void PipelineCache::RefreshGraphicsKey() { auto& key = graphics_key; key.depth = regs.depth_control; + key.depth.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && + !regs.depth_render_control.depth_clear_enable); key.depth_bounds_min = regs.depth_bounds_min; key.depth_bounds_max = regs.depth_bounds_max; key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back || @@ -111,9 +114,10 @@ void PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; const auto& db = regs.depth_buffer; - key.depth_format = key.depth.depth_enable - ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) - : vk::Format::eUndefined; + if (key.depth.depth_enable) { + key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); + key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); + } // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. // We need to do some arrays compaction at this stage @@ -180,6 +184,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index abb6d328..37b3f79b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .storeOp = vk::AttachmentStoreOp::eStore, }); } - if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { + if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) { const bool is_clear = regs.depth_render_control.depth_clear_enable; const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index b42a2bb1..1b577046 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -88,6 +88,8 @@ private: vk::Image image{}; }; +constexpr SlotId NULL_IMAGE_ID{0}; + struct Image { explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info, VAddr cpu_addr); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 1d3e5e21..59fb47b5 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -46,17 +46,20 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept + : is_storage{is_storage} { type = ConvertImageViewType(image.type); format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); range.base.level = 0; range.base.layer = 0; range.extent.levels = image.NumLevels(); range.extent.layers = image.NumLayers(); - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + if (!is_storage) { + mapping.r = ConvertComponentSwizzle(image.dst_sel_x); + mapping.g = ConvertComponentSwizzle(image.dst_sel_y); + mapping.b = ConvertComponentSwizzle(image.dst_sel_z); + mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + } } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, @@ -74,7 +77,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info } const vk::ImageViewCreateInfo image_view_ci = { - .pNext = usage_override.has_value() ? &usage_ci : nullptr, + .pNext = nullptr, .image = image.image, .viewType = info.type, .format = format, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 2e15e1a1..83936acc 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -18,12 +18,13 @@ namespace VideoCore { struct ImageViewInfo { explicit ImageViewInfo() = default; - explicit ImageViewInfo(const AmdGpu::Image& image) noexcept; + explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; SubresourceRange range; vk::ComponentMapping mapping{}; + bool is_storage; auto operator<=>(const ImageViewInfo&) const = default; }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index fca79f49..8c910c03 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -169,14 +169,14 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead); } - const ImageViewInfo view_info{desc}; + const ImageViewInfo view_info{desc, is_storage}; return RegisterImageView(image, view_info); } ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - auto& image = FindImage(info, buffer.Address()); + auto& image = FindImage(info, buffer.Address(), false); image.flags &= ~ImageFlagBits::CpuModified; image.Transit(vk::ImageLayout::eColorAttachmentOptimal, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1722fc20..1d5aafa2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -55,6 +55,11 @@ public: /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); + /// Retrieves the image with the specified id. + [[nodiscard]] Image& GetImage(ImageId id) { + return slot_images[id]; + } + private: ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info);