From a2394109ee09ffac5d415f1fdba0f8c74bb7c374 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 17:54:36 +0300 Subject: [PATCH 1/9] kernel: Cleanup filesystem code --- src/core/libraries/kernel/file_system.cpp | 111 +++++++--------------- 1 file changed, 35 insertions(+), 76 deletions(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 3ef4b6a0..8ec8b167 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/singleton.h" +#include "common/scope_exit.h" #include "core/file_sys/fs.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/file_system.h" @@ -12,26 +13,16 @@ namespace Libraries::Kernel { std::vector GetDirectoryEntries(const std::string& path) { - std::string curpath = path; - if (!curpath.ends_with("/")) { - curpath = std::string(curpath + "/"); - } std::vector files; - - for (const auto& entry : std::filesystem::directory_iterator(curpath)) { - Core::FileSys::DirEntry e = {}; - if (std::filesystem::is_directory(entry.path().string())) { - e.name = entry.path().filename().string(); - e.isFile = false; - } else { - e.name = entry.path().filename().string(); - e.isFile = true; - } - files.push_back(e); + for (const auto& entry : std::filesystem::directory_iterator(path)) { + auto& dir_entry = files.emplace_back(); + dir_entry.name = entry.path().filename().string(); + dir_entry.isFile = !std::filesystem::is_directory(entry.path().string()); } return files; } + int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); auto* h = Common::Singleton::Instance(); @@ -136,9 +127,6 @@ int PS4_SYSV_ABI posix_close(int d) { } size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) { - if (buf == nullptr) { - return SCE_KERNEL_ERROR_EFAULT; - } if (d <= 2) { // stdin,stdout,stderr char* str = strdup((const char*)buf); if (str[nbytes - 1] == '\n') @@ -152,20 +140,19 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) { if (file == nullptr) { return SCE_KERNEL_ERROR_EBADF; } - file->m_mutex.lock(); - u32 bytes_write = file->f.WriteRaw(buf, static_cast(nbytes)); - file->m_mutex.unlock(); - return bytes_write; + + std::scoped_lock lk{file->m_mutex}; + return file->f.WriteRaw(buf, nbytes); } + size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - size_t total_read = 0; - file->m_mutex.lock(); + size_t total_read = 0; + std::scoped_lock lk{file->m_mutex}; for (int i = 0; i < iovcnt; i++) { total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); } - file->m_mutex.unlock(); return total_read; } @@ -173,24 +160,18 @@ s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - file->m_mutex.lock(); - Common::FS::SeekOrigin origin; + Common::FS::SeekOrigin origin{}; if (whence == 0) { origin = Common::FS::SeekOrigin::SetOrigin; - } - - if (whence == 1) { + } else if (whence == 1) { origin = Common::FS::SeekOrigin::CurrentPosition; - } - if (whence == 2) { + } else if (whence == 2) { origin = Common::FS::SeekOrigin::End; } + std::scoped_lock lk{file->m_mutex}; file->f.Seek(offset, origin); - auto pos = static_cast(file->f.Tell()); - - file->m_mutex.unlock(); - return pos; + return file->f.Tell(); } s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) { @@ -198,19 +179,14 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) { } s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) { - if (buf == nullptr) { - return SCE_KERNEL_ERROR_EFAULT; - } - auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { return SCE_KERNEL_ERROR_EBADF; } - file->m_mutex.lock(); - u32 bytes_read = file->f.ReadRaw(buf, static_cast(nbytes)); - file->m_mutex.unlock(); - return bytes_read; + + std::scoped_lock lk{file->m_mutex}; + return file->f.ReadRaw(buf, nbytes); } int PS4_SYSV_ABI posix_read(int d, void* buf, size_t nbytes) { @@ -245,10 +221,10 @@ int PS4_SYSV_ABI posix_mkdir(const char* path, u16 mode) { int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) { LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); - std::string path_name = mnt->GetHostFile(path); - memset(sb, 0, sizeof(OrbisKernelStat)); - bool is_dir = std::filesystem::is_directory(path_name); - bool is_file = std::filesystem::is_regular_file(path_name); + const auto& path_name = mnt->GetHostFile(path); + std::memset(sb, 0, sizeof(OrbisKernelStat)); + const bool is_dir = std::filesystem::is_directory(path_name); + const bool is_file = std::filesystem::is_regular_file(path_name); if (!is_dir && !is_file) { return ORBIS_KERNEL_ERROR_ENOENT; } @@ -290,35 +266,28 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { if (d < 3) { return ORBIS_KERNEL_ERROR_EPERM; } - - if (buf == nullptr) { - return ORBIS_KERNEL_ERROR_EFAULT; - } - if (offset < 0) { return ORBIS_KERNEL_ERROR_EINVAL; } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - file->m_mutex.lock(); + + std::scoped_lock lk{file->m_mutex}; if (file->f.Tell() != offset) { file->f.Seek(offset); } - u32 bytes_read = file->f.ReadRaw(buf, static_cast(nbytes)); - file->m_mutex.unlock(); - return bytes_read; + return file->f.ReadRaw(buf, nbytes); } int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) { LOG_INFO(Kernel_Fs, "(PARTIAL) fd = {}", fd); auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(fd); - memset(sb, 0, sizeof(OrbisKernelStat)); + std::memset(sb, 0, sizeof(OrbisKernelStat)); if (file->is_directory) { sb->st_mode = 0000777u | 0040000u; @@ -347,13 +316,14 @@ s32 PS4_SYSV_ABI sceKernelFsync(int fd) { return ORBIS_OK; } -int GetDents(int fd, char* buf, int nbytes, s64* basep) { +static int GetDents(int fd, char* buf, int nbytes, s64* basep) { // TODO error codes + ASSERT(buf != nullptr); auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(fd); if (file->dirents_index == file->dirents.size()) { - return 0; + return ORBIS_OK; } const auto& entry = file->dirents.at(file->dirents_index++); @@ -388,31 +358,20 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { if (d < 3) { return ORBIS_KERNEL_ERROR_EPERM; } - - if (buf == nullptr) { - return ORBIS_KERNEL_ERROR_EFAULT; - } - if (offset < 0) { return ORBIS_KERNEL_ERROR_EINVAL; } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - file->m_mutex.lock(); - - auto pos = file->f.Tell(); - file->f.Seek(offset); - u32 bytes_write = file->f.WriteRaw(buf, static_cast(nbytes)); - file->f.Seek(pos); - file->m_mutex.unlock(); - - return bytes_write; + std::scoped_lock lk{file->m_mutex}; + const s64 pos = file->f.Tell(); + SCOPE_EXIT { file->f.Seek(pos); }; + return file->f.Seek(offset) && file->f.WriteRaw(buf, nbytes); } void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { From 9bd389882943c3934b3b8587c975af26c5708075 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 17:57:18 +0300 Subject: [PATCH 2/9] kernel: Rework semaphore implementation --- .../libraries/kernel/threads/semaphore.cpp | 113 +++++++++++------- 1 file changed, 72 insertions(+), 41 deletions(-) diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index e6fc667d..63c33de7 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -3,13 +3,14 @@ #include #include +#include #include #include #include "common/assert.h" #include "common/logging/log.h" -#include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" +#include "core/libraries/kernel/thread_management.h" namespace Libraries::Kernel { @@ -18,26 +19,30 @@ using ListBaseHook = class Semaphore { public: - Semaphore(s32 init_count, s32 max_count, const char* name, bool is_fifo) - : name{name}, token_count{init_count}, max_count{max_count}, is_fifo{is_fifo} {} + Semaphore(s32 init_count, s32 max_count, std::string_view name, bool is_fifo) + : name{name}, token_count{init_count}, max_count{max_count}, + init_count{init_count}, is_fifo{is_fifo} {} + ~Semaphore() { + ASSERT(wait_list.empty()); + } - bool Wait(bool can_block, s32 need_count, u64* timeout) { - if (HasAvailableTokens(need_count)) { - return true; + int Wait(bool can_block, s32 need_count, u32* timeout) { + std::unique_lock lk{mutex}; + if (token_count >= need_count) { + token_count -= need_count; + return ORBIS_OK; } if (!can_block) { - return false; + return ORBIS_KERNEL_ERROR_EBUSY; } // Create waiting thread object and add it into the list of waiters. WaitingThread waiter{need_count, is_fifo}; AddWaiter(waiter); - SCOPE_EXIT { - PopWaiter(waiter); - }; // Perform the wait. - return waiter.Wait(timeout); + std::exchange(lk, std::unique_lock{waiter.mutex}); + return waiter.Wait(lk, timeout); } bool Signal(s32 signal_count) { @@ -48,25 +53,47 @@ public: token_count += signal_count; // Wake up threads in order of priority. - for (auto& waiter : wait_list) { + for (auto it = wait_list.begin(); it != wait_list.end();) { + auto& waiter = *it; if (waiter.need_count > token_count) { + it++; continue; } + std::scoped_lock lk2{waiter.mutex}; token_count -= waiter.need_count; waiter.cv.notify_one(); + it = wait_list.erase(it); } return true; } -private: + int Cancel(s32 set_count, s32* num_waiters) { + std::scoped_lock lk{mutex}; + if (num_waiters) { + *num_waiters = wait_list.size(); + } + for (auto& waiter : wait_list) { + waiter.was_cancled = true; + waiter.cv.notify_one(); + } + wait_list.clear(); + token_count = set_count < 0 ? init_count : set_count; + return ORBIS_OK; + } + +public: struct WaitingThread : public ListBaseHook { std::mutex mutex; + std::string name; std::condition_variable cv; u32 priority; s32 need_count; + bool was_deleted{}; + bool was_cancled{}; explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { + name = scePthreadSelf()->name; if (is_fifo) { return; } @@ -77,12 +104,24 @@ private: priority = param.sched_priority; } - bool Wait(u64* timeout) { - std::unique_lock lk{mutex}; + int GetResult(bool timed_out) { + if (timed_out) { + return SCE_KERNEL_ERROR_ETIMEDOUT; + } + if (was_deleted) { + return SCE_KERNEL_ERROR_EACCES; + } + if (was_cancled) { + return SCE_KERNEL_ERROR_ECANCELED; + } + return SCE_OK; + } + + int Wait(std::unique_lock& lk, u32* timeout) { if (!timeout) { // Wait indefinitely until we are woken up. cv.wait(lk); - return true; + return GetResult(false); } // Wait until timeout runs out, recording how much remaining time there was. const auto start = std::chrono::high_resolution_clock::now(); @@ -91,16 +130,11 @@ private: const auto time = std::chrono::duration_cast(end - start).count(); *timeout -= time; - return status != std::cv_status::timeout; - } - - bool operator<(const WaitingThread& other) const { - return priority < other.priority; + return GetResult(status == std::cv_status::timeout); } }; void AddWaiter(WaitingThread& waiter) { - std::scoped_lock lk{mutex}; // Insert at the end of the list for FIFO order. if (is_fifo) { wait_list.push_back(waiter); @@ -114,20 +148,6 @@ private: wait_list.insert(it, waiter); } - void PopWaiter(WaitingThread& waiter) { - std::scoped_lock lk{mutex}; - wait_list.erase(WaitingThreads::s_iterator_to(waiter)); - } - - bool HasAvailableTokens(s32 need_count) { - std::scoped_lock lk{mutex}; - if (token_count >= need_count) { - token_count -= need_count; - return true; - } - return false; - } - using WaitingThreads = boost::intrusive::list, boost::intrusive::constant_time_size>; @@ -136,6 +156,7 @@ private: std::atomic token_count; std::mutex mutex; s32 max_count; + s32 init_count; bool is_fifo; }; @@ -151,9 +172,8 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 return ORBIS_OK; } -s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u64* pTimeout) { - ASSERT(sem->Wait(true, needCount, pTimeout)); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { + return sem->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { @@ -164,9 +184,18 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { - if (!sem->Wait(false, needCount, nullptr)) { - return ORBIS_KERNEL_ERROR_EBUSY; + return sem->Wait(false, needCount, nullptr); +} + +int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32 *pNumWaitThreads) { + return sem->Cancel(setCount, pNumWaitThreads); +} + +int PS4_SYSV_ABI sceKernelDeleteSema(OrbisKernelSema sem) { + if (!sem) { + return SCE_KERNEL_ERROR_ESRCH; } + delete sem; return ORBIS_OK; } @@ -175,6 +204,8 @@ void SemaphoreSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Zxa0VhQVTsk", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitSema); LIB_FUNCTION("4czppHBiriw", "libkernel", 1, "libkernel", 1, 1, sceKernelSignalSema); LIB_FUNCTION("12wOHk8ywb0", "libkernel", 1, "libkernel", 1, 1, sceKernelPollSema); + LIB_FUNCTION("4DM06U2BNEY", "libkernel", 1, "libkernel", 1, 1, sceKernelCancelSema); + LIB_FUNCTION("R1Jvn8bSCW8", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteSema); } } // namespace Libraries::Kernel From 4846704832627508a04c85ccde0135db81f4fdcf Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:03:09 +0300 Subject: [PATCH 3/9] shader_recompiler: More instructions and fix for swords of ditto --- .../frontend/translate/scalar_alu.cpp | 19 +++++++++ .../frontend/translate/translate.cpp | 41 ++++++++++++++++++- .../frontend/translate/translate.h | 2 + .../ir/passes/resource_tracking_pass.cpp | 7 +++- .../renderer_vulkan/liverpool_to_vk.cpp | 3 ++ 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index b85917b6..c090d8ce 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -196,6 +196,9 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) { case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result); break; + case OperandField::ExecLo: + ir.SetExec(result); + break; default: UNREACHABLE(); } @@ -325,4 +328,20 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } +void Translator::S_ADD_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(src0, src1)); + // TODO: Carry out + ir.SetScc(ir.Imm1(false)); +} + +void Translator::S_SUB_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ISub(src0, src1)); + // TODO: Carry out + ir.SetScc(ir.Imm1(false)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2d1679f3..6867591a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -105,7 +105,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { } break; case OperandField::ConstFloatPos_1_0: - value = ir.Imm32(1.f); + if (force_flt) { + value = ir.Imm32(1.f); + } else { + value = ir.Imm32(std::bit_cast(1.f)); + } break; case OperandField::ConstFloatPos_0_5: value = ir.Imm32(0.5f); @@ -274,6 +278,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_LOAD_DWORDX8: translator.S_LOAD_DWORD(8, inst); break; + case Opcode::S_LOAD_DWORDX16: + translator.S_LOAD_DWORD(16, inst); + break; case Opcode::S_BUFFER_LOAD_DWORD: translator.S_BUFFER_LOAD_DWORD(1, inst); break; @@ -437,9 +444,18 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::BUFFER_LOAD_FORMAT_X: translator.BUFFER_LOAD_FORMAT(1, false, inst); break; + case Opcode::BUFFER_LOAD_FORMAT_XYZ: + translator.BUFFER_LOAD_FORMAT(3, false, inst); + break; + case Opcode::BUFFER_LOAD_FORMAT_XYZW: + translator.BUFFER_LOAD_FORMAT(4, false, inst); + break; case Opcode::BUFFER_STORE_FORMAT_X: translator.BUFFER_STORE_FORMAT(1, false, inst); break; + case Opcode::BUFFER_STORE_FORMAT_XYZW: + translator.BUFFER_STORE_FORMAT(4, false, inst); + break; case Opcode::V_MAX_F32: translator.V_MAX_F32(inst); break; @@ -696,6 +712,29 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_BREV_B32: translator.S_BREV_B32(inst); break; + case Opcode::S_ADD_U32: + translator.S_ADD_U32(inst); + break; + case Opcode::S_SUB_U32: + translator.S_SUB_U32(inst); + break; + // TODO: Separate implementation for legacy variants. + case Opcode::V_MUL_LEGACY_F32: + translator.V_MUL_F32(inst); + break; + case Opcode::V_MAC_LEGACY_F32: + translator.V_MAC_F32(inst); + break; + case Opcode::V_MAD_LEGACY_F32: + translator.V_MAD_F32(inst); + break; + case Opcode::V_RSQ_LEGACY_F32: + case Opcode::V_RSQ_CLAMP_F32: + translator.V_RSQ_F32(inst); + break; + case Opcode::V_RCP_IFLAG_F32: + translator.V_RCP_F32(inst); + break; case Opcode::S_TTRACEDATA: LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); break; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 9c6d6a30..ace9042e 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -54,6 +54,8 @@ public: void S_BFM_B32(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); + void S_ADD_U32(const GcnInst& inst); + void S_SUB_U32(const GcnInst& inst); // Scalar Memory void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index a1f599ba..8e1c186c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -315,8 +315,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const u32 arg_pos = inst_info.is_depth ? 5 : 4; inst.SetArg(arg_pos, arg); } - if (inst_info.explicit_lod && inst.GetOpcode() == IR::Opcode::ImageFetch) { - inst.SetArg(3, arg); + if (inst_info.explicit_lod) { + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || + inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod); + const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2; + inst.SetArg(pos, arg); } } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index f82a976a..384d3167 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -354,6 +354,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc2UnormBlock; } + if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR16G16Snorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } From 3a80df007eefaf8e7a85a313a0e5e97a3d7e0dd4 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:04:28 +0300 Subject: [PATCH 4/9] linker: More null check --- src/core/linker.cpp | 6 ++++-- src/core/memory.cpp | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 1e1d4301..09526b53 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -68,8 +68,10 @@ void Linker::Execute() { } // Configure used flexible memory size. - if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) { - memory->SetTotalFlexibleSize(*flexible_size); + if (auto* mem_param = GetProcParam()->mem_param) { + if (u64* flexible_size = mem_param->flexible_memory_size) { + memory->SetTotalFlexibleSize(*flexible_size); + } } // Init primary thread. diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 58593fd7..9ebd1fea 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -206,9 +206,15 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr const auto& vma = it->second; ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); - *start = reinterpret_cast(vma.base); - *end = reinterpret_cast(vma.base + vma.size); - *prot = static_cast(vma.prot); + if (start != nullptr) { + *start = reinterpret_cast(vma.base); + } + if (end != nullptr) { + *end = reinterpret_cast(vma.base + vma.size); + } + if (prot != nullptr) { + *prot = static_cast(vma.prot); + } return ORBIS_OK; } From e94149340e151fce68bb42ec6603a433b93a76d7 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:09:58 +0300 Subject: [PATCH 5/9] kernel: Fix pread and pwrite --- src/core/libraries/kernel/file_system.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 8ec8b167..27a6ccf3 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -277,9 +277,9 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { } std::scoped_lock lk{file->m_mutex}; - if (file->f.Tell() != offset) { - file->f.Seek(offset); - } + const s64 pos = file->f.Tell(); + SCOPE_EXIT { file->f.Seek(pos); }; + file->f.Seek(offset); return file->f.ReadRaw(buf, nbytes); } @@ -371,7 +371,8 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { std::scoped_lock lk{file->m_mutex}; const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); }; - return file->f.Seek(offset) && file->f.WriteRaw(buf, nbytes); + file->f.Seek(offset); + return file->f.WriteRaw(buf, nbytes); } void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { From c081663aac290a8fa3f88b9b0663ee3d0b274cb5 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:16:01 +0300 Subject: [PATCH 6/9] translator: Merge ANDN2 with AND and impl ORN2 --- .../frontend/translate/scalar_alu.cpp | 52 +++++-------------- .../frontend/translate/translate.cpp | 13 +++-- .../frontend/translate/translate.h | 11 ++-- 3 files changed, 28 insertions(+), 48 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index c090d8ce..9e0d78c4 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -46,40 +46,6 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) { ir.SetScc(result); } -void Translator::S_ANDN2_B64(const GcnInst& inst) { - // TODO: What if this is used for something other than EXEC masking? - const auto get_src = [&](const InstOperand& operand) { - switch (operand.field) { - case OperandField::VccLo: - return ir.GetVcc(); - case OperandField::ExecLo: - return ir.GetExec(); - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - default: - UNREACHABLE(); - } - }; - - const IR::U1 src0{get_src(inst.src[0])}; - const IR::U1 src1{get_src(inst.src[1])}; - const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))}; - ir.SetScc(result); - switch (inst.dst[0].field) { - case OperandField::VccLo: - ir.SetVcc(result); - break; - case OperandField::ExecLo: - ir.SetExec(result); - break; - case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result); - break; - default: - UNREACHABLE(); - } -} - void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination @@ -138,7 +104,7 @@ void Translator::S_MOV_B64(const GcnInst& inst) { } } -void Translator::S_OR_B64(bool negate, const GcnInst& inst) { +void Translator::S_OR_B64(NegateMode negate, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { case OperandField::VccLo: @@ -151,9 +117,12 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) { }; const IR::U1 src0{get_src(inst.src[0])}; - const IR::U1 src1{get_src(inst.src[1])}; + IR::U1 src1{get_src(inst.src[1])}; + if (negate == NegateMode::Src1) { + src1 = ir.LogicalNot(src1); + } IR::U1 result = ir.LogicalOr(src0, src1); - if (negate) { + if (negate == NegateMode::Result) { result = ir.LogicalNot(result); } ir.SetScc(result); @@ -169,7 +138,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) { } } -void Translator::S_AND_B64(bool negate, const GcnInst& inst) { +void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { case OperandField::VccLo: @@ -183,9 +152,12 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) { } }; const IR::U1 src0{get_src(inst.src[0])}; - const IR::U1 src1{get_src(inst.src[1])}; + IR::U1 src1{get_src(inst.src[1])}; + if (negate == NegateMode::Src1) { + src1 = ir.LogicalNot(src1); + } IR::U1 result = ir.LogicalAnd(src0, src1); - if (negate) { + if (negate == NegateMode::Result) { result = ir.LogicalNot(result); } ir.SetScc(result); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 6867591a..0b61be9b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -469,7 +469,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_RSQ_F32(inst); break; case Opcode::S_ANDN2_B64: - translator.S_ANDN2_B64(inst); + translator.S_AND_B64(NegateMode::Src1, inst); + break; + case Opcode::S_ORN2_B64: + translator.S_OR_B64(NegateMode::Src1, inst); break; case Opcode::V_SIN_F32: translator.V_SIN_F32(inst); @@ -608,19 +611,19 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_CMP_U32(ConditionOp::TRU, false, true, inst); break; case Opcode::S_OR_B64: - translator.S_OR_B64(false, inst); + translator.S_OR_B64(NegateMode::None, inst); break; case Opcode::S_NOR_B64: - translator.S_OR_B64(true, inst); + translator.S_OR_B64(NegateMode::Result, inst); break; case Opcode::S_AND_B64: - translator.S_AND_B64(false, inst); + translator.S_AND_B64(NegateMode::None, inst); break; case Opcode::S_NOT_B64: translator.S_NOT_B64(inst); break; case Opcode::S_NAND_B64: - translator.S_AND_B64(true, inst); + translator.S_AND_B64(NegateMode::Result, inst); break; case Opcode::V_LSHRREV_B32: translator.V_LSHRREV_B32(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ace9042e..4f10c49a 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -26,6 +26,12 @@ enum class ConditionOp : u32 { TRU, }; +enum class NegateMode : u32 { + None, + Src1, + Result, +}; + class Translator { public: explicit Translator(IR::Block* block_, Info& info); @@ -38,11 +44,10 @@ public: void S_MOV(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); - void S_ANDN2_B64(const GcnInst& inst); void S_AND_SAVEEXEC_B64(const GcnInst& inst); void S_MOV_B64(const GcnInst& inst); - void S_OR_B64(bool negate, const GcnInst& inst); - void S_AND_B64(bool negate, const GcnInst& inst); + void S_OR_B64(NegateMode negate, const GcnInst& inst); + void S_AND_B64(NegateMode negate, const GcnInst& inst); void S_ADD_I32(const GcnInst& inst); void S_AND_B32(const GcnInst& inst); void S_OR_B32(const GcnInst& inst); From c8ed338d5a9a4f66b3892278dd6bca5ec7492826 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:17:39 +0300 Subject: [PATCH 7/9] kernel: Const correctness --- src/common/io_file.h | 2 +- src/core/libraries/kernel/file_system.cpp | 2 +- src/core/module.cpp | 1 - src/shader_recompiler/frontend/translate/translate.cpp | 1 + 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/io_file.h b/src/common/io_file.h index e57a5a78..c6dd8082 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -181,7 +181,7 @@ public: } template - size_t WriteRaw(void* data, size_t size) const { + size_t WriteRaw(const void* data, size_t size) const { return std::fwrite(data, sizeof(T), size, file); } diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 27a6ccf3..d4fcb294 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -126,7 +126,7 @@ int PS4_SYSV_ABI posix_close(int d) { return ORBIS_OK; } -size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) { +size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { if (d <= 2) { // stdin,stdout,stderr char* str = strdup((const char*)buf); if (str[nbytes - 1] == '\n') diff --git a/src/core/module.cpp b/src/core/module.cpp index 1353637e..86893f61 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -11,7 +11,6 @@ #include "core/memory.h" #include "core/module.h" #include "core/tls.h" -#include "core/virtual_memory.h" namespace Core { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 0b61be9b..b7593c1a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -331,6 +331,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: + case Opcode::IMAGE_SAMPLE_L: translator.IMAGE_SAMPLE(inst); break; case Opcode::IMAGE_STORE: From 550bfa1c88a748c314c3914efda8317e2120a752 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 26 Jun 2024 20:00:09 +0300 Subject: [PATCH 8/9] liverpool: Fix assert for compute queues --- src/video_core/amdgpu/liverpool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 38d27410..6e0aca43 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -485,7 +485,7 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { } void Liverpool::SubmitAsc(u32 vqid, std::span acb) { - ASSERT_MSG(vqid > 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); + ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); auto& queue = mapped_queues[vqid]; const auto& task = ProcessCompute(acb); From 521ff4d14b63d51a084eea2d6ebf92b2ed53ab1e Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Wed, 26 Jun 2024 20:32:08 +0300 Subject: [PATCH 9/9] clang format fixes --- src/core/libraries/kernel/file_system.cpp | 12 ++++++++---- src/core/libraries/kernel/threads/semaphore.cpp | 8 ++++---- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 3 ++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index d4fcb294..0adb058e 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -3,8 +3,8 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "common/singleton.h" #include "common/scope_exit.h" +#include "common/singleton.h" #include "core/file_sys/fs.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/file_system.h" @@ -148,7 +148,7 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - size_t total_read = 0; + size_t total_read = 0; std::scoped_lock lk{file->m_mutex}; for (int i = 0; i < iovcnt; i++) { total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); @@ -278,7 +278,9 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { std::scoped_lock lk{file->m_mutex}; const s64 pos = file->f.Tell(); - SCOPE_EXIT { file->f.Seek(pos); }; + SCOPE_EXIT { + file->f.Seek(pos); + }; file->f.Seek(offset); return file->f.ReadRaw(buf, nbytes); } @@ -370,7 +372,9 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { std::scoped_lock lk{file->m_mutex}; const s64 pos = file->f.Tell(); - SCOPE_EXIT { file->f.Seek(pos); }; + SCOPE_EXIT { + file->f.Seek(pos); + }; file->f.Seek(offset); return file->f.WriteRaw(buf, nbytes); } diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 63c33de7..ba8d6300 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -9,8 +9,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" -#include "core/libraries/libs.h" #include "core/libraries/kernel/thread_management.h" +#include "core/libraries/libs.h" namespace Libraries::Kernel { @@ -20,8 +20,8 @@ using ListBaseHook = class Semaphore { public: Semaphore(s32 init_count, s32 max_count, std::string_view name, bool is_fifo) - : name{name}, token_count{init_count}, max_count{max_count}, - init_count{init_count}, is_fifo{is_fifo} {} + : name{name}, token_count{init_count}, max_count{max_count}, init_count{init_count}, + is_fifo{is_fifo} {} ~Semaphore() { ASSERT(wait_list.empty()); } @@ -187,7 +187,7 @@ s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { return sem->Wait(false, needCount, nullptr); } -int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32 *pNumWaitThreads) { +int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { return sem->Cancel(setCount, pNumWaitThreads); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 384d3167..4fcee07a 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -354,7 +354,8 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc2UnormBlock; } - if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Snorm) { + if (data_format == AmdGpu::DataFormat::Format16_16 && + num_format == AmdGpu::NumberFormat::Snorm) { return vk::Format::eR16G16Snorm; } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));