From dd91456b48f995b4bb9583c4345430b2d4f292eb Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 30 May 2024 18:07:36 +0300 Subject: [PATCH] video_core: Moar shader instruction --- src/core/libraries/kernel/libkernel.cpp | 1 + .../libraries/kernel/memory_management.cpp | 20 +++++++---- src/core/libraries/kernel/memory_management.h | 9 +++++ src/core/memory.cpp | 27 +++++++++++--- src/core/memory.h | 20 +++++++---- src/input/controller.h | 2 +- .../frontend/translate/translate.cpp | 24 +++++++++++++ .../frontend/translate/translate.h | 6 ++++ .../frontend/translate/vector_alu.cpp | 35 ++++++++++++++++++- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 11 files changed, 127 insertions(+), 20 deletions(-) diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index cad5dd97..a8c3975e 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -207,6 +207,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory); LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection); + LIB_FUNCTION("BHouLQzh0X0", "libkernel", 1, "libkernel", 1, 1, sceKernelDirectMemoryQuery); LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory); LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory); diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 2e650273..9e540107 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -18,11 +18,6 @@ u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize() { int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, int memoryType, s64* physAddrOut) { - LOG_INFO(Kernel_Vmm, - "searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, alignment = {:#x}, memoryType = " - "{:#x}", - searchStart, searchEnd, len, alignment, memoryType); - if (searchStart < 0 || searchEnd <= searchStart) { LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!"); return SCE_KERNEL_ERROR_EINVAL; @@ -44,7 +39,12 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u auto* memory = Core::Memory::Instance(); PAddr phys_addr = memory->Allocate(searchStart, searchEnd, len, alignment, memoryType); *physAddrOut = static_cast(phys_addr); - LOG_INFO(Kernel_Vmm, "physAddrOut = {:#x}", phys_addr); + + LOG_INFO(Kernel_Vmm, + "searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, " + "alignment = {:#x}, memoryType = {:#x}, physAddrOut = {:#x}", + searchStart, searchEnd, len, alignment, memoryType, phys_addr); + return SCE_OK; } @@ -115,8 +115,16 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, } int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { + LOG_WARNING(Kernel_Vmm, "called"); auto* memory = Core::Memory::Instance(); return memory->QueryProtection(std::bit_cast(addr), start, end, prot); } +int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, + size_t infoSize) { + LOG_WARNING(Kernel_Vmm, "called"); + auto* memory = Core::Memory::Instance(); + return memory->DirectMemoryQuery(offset, flags == 1, query_info); +} + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index c4bc338f..be0d8514 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -30,6 +30,12 @@ enum MemoryProtection : u32 { SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU }; +struct OrbisQueryInfo { + uintptr_t start; + uintptr_t end; + int memoryType; +}; + u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize(); int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, int memoryType, s64* physAddrOut); @@ -41,4 +47,7 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int flags); int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot); +int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, + size_t infoSize); + } // namespace Libraries::Kernel diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 83758688..06fde132 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/scope_exit.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/memory_management.h" #include "core/memory.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -80,7 +81,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) { // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; - const u32 remaining_size = vma.base + vma.size - mapped_addr; + const size_t remaining_size = vma.base + vma.size - mapped_addr; ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); } @@ -131,7 +132,22 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr *start = reinterpret_cast(vma.base); *end = reinterpret_cast(vma.base + vma.size); *prot = static_cast(vma.prot); - return SCE_OK; + return ORBIS_OK; +} + +int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, + Libraries::Kernel::OrbisQueryInfo* out_info) { + const auto it = std::ranges::find_if(allocations, [&](const DirectMemoryArea& alloc) { + return alloc.base <= addr && addr < alloc.base + alloc.size; + }); + if (it == allocations.end()) { + return SCE_KERNEL_ERROR_EACCES; + } + + out_info->start = it->base; + out_info->end = it->base + it->size; + out_info->memoryType = it->memory_type; + return ORBIS_OK; } std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { @@ -146,7 +162,8 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; - ASSERT_MSG(vma.type == VMAType::Free, "Adding a mapping to already mapped region"); + ASSERT_MSG(vma.type == VMAType::Free && vma.base <= virtual_addr, + "Adding a mapping to already mapped region"); const VAddr start_in_vma = virtual_addr - vma.base; const VAddr end_in_vma = start_in_vma + size; @@ -164,7 +181,7 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { return vma_handle->second; } -MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u32 offset_in_vma) { +MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) { auto& old_vma = vma_handle->second; ASSERT(offset_in_vma < old_vma.size && offset_in_vma > 0); @@ -199,6 +216,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) { } void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { + return; const vk::Device device = instance->GetDevice(); const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); void* host_pointer = reinterpret_cast(addr); @@ -270,6 +288,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { } void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { + return; const auto it = mapped_memories.find(addr); ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); mapped_memories.erase(it); diff --git a/src/core/memory.h b/src/core/memory.h index 24e38df8..ab9006a4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -17,6 +17,10 @@ namespace Vulkan { class Instance; } +namespace Libraries::Kernel { +struct OrbisQueryInfo; +} + namespace Core { enum class MemoryProt : u32 { @@ -77,12 +81,12 @@ struct VirtualMemoryArea { } }; -constexpr VAddr SYSTEM_RESERVED = 0x800000000u; -constexpr VAddr CODE_BASE_OFFSET = 0x100000000u; -constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000u; -constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFu; -constexpr VAddr USER_MIN = 0x1000000000u; -constexpr VAddr USER_MAX = 0xFBFFFFFFFFu; +constexpr VAddr SYSTEM_RESERVED = 0x800000000ULL; +constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; +constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000ULL; +constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; +constexpr VAddr USER_MIN = 0x1000000000ULL; +constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL; class MemoryManager { using VMAMap = std::map; @@ -109,6 +113,8 @@ public: int QueryProtection(VAddr addr, void** start, void** end, u32* prot); + int DirectMemoryQuery(PAddr addr, bool find_next, Libraries::Kernel::OrbisQueryInfo* out_info); + std::pair GetVulkanBuffer(VAddr addr); private: @@ -123,7 +129,7 @@ private: VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size); - VMAHandle Split(VMAHandle vma_handle, u32 offset_in_vma); + VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma); VMAHandle MergeAdjacent(VMAHandle iter); diff --git a/src/input/controller.h b/src/input/controller.h index 4819e2d7..774bbca7 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -32,7 +32,7 @@ private: }; std::mutex m_mutex; - bool m_connected = false; + bool m_connected = true; State m_last_state; int m_connected_count = 0; u32 m_states_num = 0; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 6136b46a..62409152 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -111,6 +111,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { case OperandField::ConstFloatNeg_1_0: value = ir.Imm32(-1.0f); break; + case OperandField::ConstFloatNeg_2_0: + value = ir.Imm32(-2.0f); + break; case OperandField::VccLo: value = ir.GetVccLo(); break; @@ -327,9 +330,30 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_ANDN2_B64: translator.S_ANDN2_B64(inst); break; + case Opcode::V_SIN_F32: + translator.V_SIN_F32(inst); + break; + case Opcode::V_LOG_F32: + translator.V_LOG_F32(inst); + break; + case Opcode::V_EXP_F32: + translator.V_EXP_F32(inst); + break; + case Opcode::V_SQRT_F32: + translator.V_SQRT_F32(inst); + break; + case Opcode::V_MIN_F32: + translator.V_MIN_F32(inst); + break; + case Opcode::V_MIN3_F32: + translator.V_MIN3_F32(inst); + break; case Opcode::S_NOP: + case Opcode::S_AND_B64: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: + case Opcode::S_CBRANCH_SCC1: + case Opcode::S_BRANCH: case Opcode::S_MOV_B64: case Opcode::S_WQM_B64: case Opcode::V_INTERP_P1_F32: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6e50e8fb..e721dad5 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -68,6 +68,12 @@ public: void V_CMP_F32(ConditionOp op, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst); + void V_SIN_F32(const GcnInst& inst); + void V_LOG_F32(const GcnInst& inst); + void V_EXP_F32(const GcnInst& inst); + void V_SQRT_F32(const GcnInst& inst); + void V_MIN_F32(const GcnInst& inst); + void V_MIN3_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 44394013..81366117 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -26,7 +26,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { void Translator::V_MUL_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); + ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } void Translator::V_CMP_EQ_U32(const GcnInst& inst) { @@ -198,4 +198,37 @@ void Translator::V_RSQ_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } +void Translator::V_SIN_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPSin(src0)); +} + +void Translator::V_LOG_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPLog2(src0)); +} + +void Translator::V_EXP_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPExp2(src0)); +} + +void Translator::V_SQRT_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPSqrt(src0)); +} + +void Translator::V_MIN_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; + SetDst(inst.dst[0], ir.FPMin(src0, src1)); +} + +void Translator::V_MIN3_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src2{GetSrc(inst.src[2], true)}; + SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); +} + } // namespace Shader::Gcn diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 17ed225b..ddc67d8e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -42,7 +42,7 @@ struct GraphicsPipelineKey { std::array write_masks; bool operator==(const GraphicsPipelineKey& key) const noexcept { - return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0; + return std::memcmp(this, &key, sizeof(key)) == 0; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1ddfa2fa..66ff9403 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -93,6 +93,7 @@ void PipelineCache::RefreshGraphicsKey() { key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); + key.cull_mode = regs.polygon_control.CullingMode(); const auto& db = regs.depth_buffer; key.depth_format = key.depth.depth_enable