From 8dfa5782b2e7a607f297a4edbde084f1a7f6b4c5 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 26 May 2024 15:51:35 +0300 Subject: [PATCH] video_core: Add constant buffer support (#147) --- src/common/logging/backend.cpp | 1 + src/core/libraries/kernel/libkernel.cpp | 1 + .../libraries/kernel/memory_management.cpp | 5 + src/core/libraries/kernel/memory_management.h | 1 + src/core/memory.cpp | 13 +- src/core/memory.h | 2 + .../spirv/emit_spirv_context_get_set.cpp | 38 ++++++ .../backend/spirv/emit_spirv_instructions.h | 9 ++ .../backend/spirv/spirv_emit_context.cpp | 55 +++++++- .../backend/spirv/spirv_emit_context.h | 12 +- .../frontend/translate/translate.cpp | 5 +- .../frontend/translate/translate.h | 3 + .../frontend/translate/vector_memory.cpp | 31 +++++ src/shader_recompiler/ir/ir_emitter.cpp | 20 +++ src/shader_recompiler/ir/ir_emitter.h | 5 + src/shader_recompiler/ir/opcodes.inc | 7 + .../ir/passes/resource_tracking_pass.cpp | 122 ++++++++++++------ src/shader_recompiler/ir/reg.h | 11 ++ src/shader_recompiler/recompiler.cpp | 3 +- src/shader_recompiler/runtime_info.h | 32 ++++- src/video_core/amdgpu/resource.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 56 +++++++- .../renderer_vulkan/vk_graphics_pipeline.h | 6 +- .../renderer_vulkan/vk_instance.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 7 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 +- 26 files changed, 395 insertions(+), 56 deletions(-) diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 0d75b331..0fd344b5 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -189,6 +189,7 @@ public: } else { ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); } + std::fflush(stdout); } private: diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 003e45e1..0f847f1a 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -204,6 +204,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory); LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory); + LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection); LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory); LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory); diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index a256bd9a..2e650273 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -114,4 +114,9 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); } +int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { + auto* memory = Core::Memory::Instance(); + return memory->QueryProtection(std::bit_cast(addr), start, end, prot); +} + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index 1df7b75f..c4bc338f 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -39,5 +39,6 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t l int flags, const char* name); s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, int flags); +int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot); } // namespace Libraries::Kernel diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a16abbe0..83758688 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -123,6 +123,17 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { impl.Unmap(virtual_addr, size); } +int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) { + const auto it = FindVMA(addr); + const auto& vma = it->second; + ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); + + *start = reinterpret_cast(vma.base); + *end = reinterpret_cast(vma.base + vma.size); + *prot = static_cast(vma.prot); + return SCE_OK; +} + std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { auto it = mapped_memories.upper_bound(addr); it = std::prev(it); @@ -243,7 +254,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { constexpr vk::BufferUsageFlags MapFlags = vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer; + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; const vk::StructureChain buffer_info = { vk::BufferCreateInfo{ diff --git a/src/core/memory.h b/src/core/memory.h index 4c0fadbf..24e38df8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -107,6 +107,8 @@ public: void UnmapMemory(VAddr virtual_addr, size_t size); + int QueryProtection(VAddr addr, void** start, void** end, u32* prot); + std::pair GetVulkanBuffer(VAddr addr); private: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2ed4e29d..1acfebe8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -29,6 +29,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { } } // Anonymous namespace +void EmitGetUserData(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + void EmitGetScalarRegister(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -95,4 +99,38 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen ctx.OpStore(pointer, value); } +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address) { + UNREACHABLE(); +} + +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address) { + UNREACHABLE(); +} + +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address) { + UNREACHABLE(); +} + +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address) { + const auto info = inst->Flags(); + const Id buffer = ctx.buffers[handle.U32()]; + const Id type = ctx.info.buffers[handle.U32()].is_storage ? ctx.storage_f32 : ctx.uniform_f32; + if (info.index_enable && info.offset_enable) { + UNREACHABLE(); + } else if (info.index_enable) { + boost::container::static_vector ids; + for (u32 i = 0; i < 4; i++) { + const Id index{ctx.OpIAdd(ctx.U32[1], ctx.Def(address), ctx.ConstU32(i))}; + const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)}; + ids.push_back(ctx.OpLoad(ctx.F32[1], ptr)); + } + return ctx.OpCompositeConstruct(ctx.F32[4], ids); + } + UNREACHABLE(); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 24685275..0d23312d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -34,6 +34,7 @@ void EmitGetVcc(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); +void EmitGetUserData(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx); void EmitSetScalarRegister(EmitContext& ctx); void EmitGetVectorRegister(EmitContext& ctx); @@ -46,6 +47,14 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu const IR::Value& offset); Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, const IR::Value& offset); +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address); +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address); +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address); +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, + const IR::Value& address); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 771e46d4..a7d3725c 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -36,7 +36,8 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} { + : Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_}, + stage{program.info.stage} { u32& uniform_binding{bindings.unified}; u32& storage_binding{bindings.unified}; u32& texture_binding{bindings.unified}; @@ -44,6 +45,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(program); + DefineBuffers(program.info); } EmitContext::~EmitContext() = default; @@ -107,8 +109,8 @@ void EmitContext::DefineArithmeticTypes() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program); - DefineOutputs(program); + DefineInputs(program.info); + DefineOutputs(program.info); } Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { @@ -164,8 +166,7 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { } } -void EmitContext::DefineInputs(const IR::Program& program) { - const auto& info = program.info; +void EmitContext::DefineInputs(const Info& info) { switch (stage) { case Stage::Vertex: vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); @@ -201,8 +202,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { } } -void EmitContext::DefineOutputs(const IR::Program& program) { - const auto& info = program.info; +void EmitContext::DefineOutputs(const Info& info) { switch (stage) { case Stage::Vertex: output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); @@ -234,4 +234,45 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } } +void EmitContext::DefineBuffers(const Info& info) { + const auto define_buffer = [&](const BufferResource& buffer, Id type, u32 element_size, + char type_char, u32 index) { + ASSERT(buffer.stride % element_size == 0); + const u32 num_elements = buffer.stride * buffer.num_records / element_size; + const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))}; + Decorate(record_array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(record_array_type)}; + const auto name = + fmt::format("{}_cbuf_block_{}{}", stage, type_char, element_size * CHAR_BIT); + Name(struct_type, name); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const auto storage_class = + buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform; + const Id struct_pointer_type{TypePointer(storage_class, struct_type)}; + if (buffer.is_storage) { + storage_f32 = TypePointer(storage_class, type); + } else { + uniform_f32 = TypePointer(storage_class, type); + } + const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("c{}", index)); + + binding++; + buffers.push_back(id); + interfaces.push_back(id); + }; + + for (u32 i = 0; const auto& buffer : info.buffers) { + ASSERT(True(buffer.used_types & IR::Type::F32)); + define_buffer(buffer, F32[1], 4, 'f', i); + i++; + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 26298e38..1baf7fa1 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -114,6 +114,7 @@ public: return ConstantComposite(type, constituents); } + Info& info; const Profile& profile; Stage stage{}; @@ -141,6 +142,9 @@ public: Id output_u32{}; Id output_f32{}; + Id uniform_f32{}; + Id storage_f32{}; + boost::container::small_vector interfaces; Id output_position{}; @@ -148,6 +152,9 @@ public: Id base_vertex{}; std::array frag_color{}; + u32 binding{}; + boost::container::small_vector buffers; + struct SpirvAttribute { Id id; Id pointer_type; @@ -160,8 +167,9 @@ public: private: void DefineArithmeticTypes(); void DefineInterfaces(const IR::Program& program); - void DefineInputs(const IR::Program& program); - void DefineOutputs(const IR::Program& program); + void DefineInputs(const Info& info); + void DefineOutputs(const Info& info); + void DefineBuffers(const Info& info); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 06faf28d..453bdcc2 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -36,7 +36,7 @@ Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*bloc // Initialize user data. IR::ScalarReg dst_sreg = IR::ScalarReg::S0; for (u32 i = 0; i < 16; i++) { - ir.SetScalarReg(dst_sreg++, ir.Imm32(0U)); + ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg)); } } @@ -171,6 +171,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CNDMASK_B32: translator.V_CNDMASK_B32(inst); break; + case Opcode::TBUFFER_LOAD_FORMAT_XYZW: + translator.TBUFFER_LOAD_FORMAT_XYZW(inst); + break; case Opcode::S_MOV_B64: case Opcode::S_WQM_B64: case Opcode::V_INTERP_P1_F32: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 7e0186f3..ca3166eb 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -48,6 +48,9 @@ public: void V_CMP_EQ_U32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); + // Vector Memory + void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst); + // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ae82e3cc..d87e957a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -100,4 +100,35 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { } } +void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) { + const auto& mtbuf = inst.control.mtbuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mtbuf.idxen && mtbuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mtbuf.idxen || mtbuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mtbuf.idxen); + info.offset_enable.Assign(mtbuf.offen); + info.inst_offset.Assign(mtbuf.offset); + info.dmft.Assign(static_cast(mtbuf.dfmt)); + info.nfmt.Assign(static_cast(mtbuf.nfmt)); + info.is_typed.Assign(1); + + const IR::Value value = ir.LoadBuffer(4, ir.GetScalarReg(sharp), address, info); + const IR::VectorReg dst_reg{inst.src[1].code}; + for (u32 i = 0; i < 4; i++) { + ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); + } +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 8bea18e0..26c9ce2a 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -111,6 +111,10 @@ void IREmitter::Epilogue() { Inst(Opcode::Epilogue); } +U32 IREmitter::GetUserData(IR::ScalarReg reg) { + return Inst(Opcode::GetUserData, reg); +} + template <> U32 IREmitter::GetScalarReg(IR::ScalarReg reg) { return Inst(Opcode::GetScalarRegister, reg); @@ -233,6 +237,22 @@ F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& return Inst(Opcode::ReadConstBufferF32, handle, index, offset); } +Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address, + BufferInstInfo info) { + switch (num_dwords) { + case 1: + return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address); + case 2: + return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address); + case 3: + return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address); + case 4: + return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address); + default: + throw InvalidArgument("Invalid number of dwords {}", num_dwords); + } +} + F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f6bc8807..a445f06a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -42,6 +42,8 @@ public: void Prologue(); void Epilogue(); + U32 GetUserData(IR::ScalarReg reg); + template [[nodiscard]] T GetScalarReg(IR::ScalarReg reg); template @@ -69,6 +71,9 @@ public: template [[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset); + [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, + BufferInstInfo info); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 59687707..929fac42 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -19,6 +19,7 @@ OPCODE(ReadConstBuffer, U32, Opaq OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 ) // Context getters/setters +OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetScalarRegister, U32, ScalarReg, ) OPCODE(SetScalarRegister, Void, ScalarReg, U32, ) OPCODE(GetVectorRegister, U32, VectorReg, ) @@ -42,6 +43,12 @@ OPCODE(UndefU16, U16, OPCODE(UndefU32, U32, ) OPCODE(UndefU64, U64, ) +// Buffer operations +OPCODE(LoadBufferF32, F32, Opaque, Opaque, ) +OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, ) +OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, ) +OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 39f0b808..79de4680 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -4,28 +4,51 @@ #include #include #include - #include - #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" namespace Shader::Optimization { namespace { struct SharpLocation { - IR::ScalarReg eud_ptr; - u32 index_dwords; + u32 sgpr_base; + u32 dword_offset; auto operator<=>(const SharpLocation&) const = default; }; -bool IsResourceInstruction(const IR::Inst& inst) { +bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferF32: + case IR::Opcode::LoadBufferF32x2: + case IR::Opcode::LoadBufferF32x3: + case IR::Opcode::LoadBufferF32x4: case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBufferF32: + return true; + default: + return false; + } +} + +IR::Type BufferLoadType(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferF32: + case IR::Opcode::LoadBufferF32x2: + case IR::Opcode::LoadBufferF32x3: + case IR::Opcode::LoadBufferF32x4: + return IR::Type::F32; + default: + UNREACHABLE(); + } +} + +bool IsImageInstruction(const IR::Inst& inst) { + switch (inst.GetOpcode()) { case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod: @@ -44,32 +67,26 @@ bool IsResourceInstruction(const IR::Inst& inst) { } } -/*class Descriptors { +class Descriptors { public: - explicit Descriptors(TextureDescriptors& texture_descriptors_) - : texture_descriptors{texture_descriptors_} {} + explicit Descriptors(BufferResourceList& buffer_resources_) + : buffer_resources{buffer_resources_} {} - u32 Add(const TextureDescriptor& desc) { - const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { - return desc.type == existing.type && desc.is_depth == existing.is_depth && - desc.has_secondary == existing.has_secondary && - desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset && - desc.shift_left == existing.shift_left && - desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && - desc.secondary_shift_left == existing.secondary_shift_left && - desc.count == existing.count && desc.size_shift == existing.size_shift; + u32 Add(const BufferResource& desc) { + const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { + return desc.sgpr_base == existing.sgpr_base && + desc.dword_offset == existing.dword_offset; })}; - // TODO: Read this from TIC - texture_descriptors[index].is_multisample |= desc.is_multisample; + auto& buffer = buffer_resources[index]; + ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records); + buffer.is_storage |= desc.is_storage; + buffer.used_types |= desc.used_types; return index; } private: template static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { - // TODO: Handle arrays const auto it{std::ranges::find_if(descriptors, pred)}; if (it != descriptors.end()) { return static_cast(std::distance(descriptors.begin(), it)); @@ -78,17 +95,16 @@ private: return static_cast(descriptors.size()) - 1; } - TextureDescriptors& texture_descriptors; -};*/ + BufferResourceList& buffer_resources; +}; } // Anonymous namespace -SharpLocation TrackSharp(const IR::Value& handle) { - IR::Inst* inst = handle.InstRecursive(); - if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) { +SharpLocation TrackSharp(const IR::Inst* inst) { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ - .eud_ptr = IR::ScalarReg::Max, - .index_dwords = inst->Arg(0).U32(), + .sgpr_base = u32(IR::ScalarReg::Max), + .dword_offset = u32(inst->Arg(0).ScalarReg()), }; } ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory"); @@ -108,21 +124,55 @@ SharpLocation TrackSharp(const IR::Value& handle) { // Return retrieved location. return SharpLocation{ - .eud_ptr = base, - .index_dwords = dword_offset, + .sgpr_base = u32(base), + .dword_offset = dword_offset, }; } +void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors) { + IR::Inst* producer = inst.Arg(0).InstRecursive(); + const auto sharp = TrackSharp(producer); + const auto buffer = info.ReadUd(sharp.sgpr_base, sharp.dword_offset); + const u32 binding = descriptors.Add(BufferResource{ + .sgpr_base = sharp.sgpr_base, + .dword_offset = sharp.dword_offset, + .stride = u32(buffer.stride), + .num_records = u32(buffer.num_records), + .used_types = BufferLoadType(inst), + .is_storage = buffer.base_address % 64 != 0, + }); + const auto inst_info = inst.Flags(); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + // Replace handle with binding index in buffer resource list. + inst.SetArg(0, ir.Imm32(binding)); + ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + if (inst_info.is_typed) { + ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && + inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32); + } + // Calculate buffer address. + const u32 dword_stride = buffer.stride / sizeof(u32); + const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32); + IR::U32 address = ir.Imm32(dword_offset); + if (inst_info.index_enable && inst_info.offset_enable) { + UNREACHABLE(); + } else if (inst_info.index_enable) { + const IR::U32 index{inst.Arg(1)}; + address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); + } + inst.SetArg(1, address); +} + void ResourceTrackingPass(IR::Program& program) { + auto& info = program.info; + Descriptors descriptors{info.buffers}; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { - if (!IsResourceInstruction(inst)) { + if (IsBufferInstruction(inst)) { + PatchBufferInstruction(*block, inst, info, descriptors); continue; } - IR::Inst* producer = inst.Arg(0).InstRecursive(); - const auto loc = TrackSharp(producer->Arg(0)); - fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, - loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max); } } } diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 721d5356..89e78532 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -6,6 +6,7 @@ #include "common/bit_field.h" #include "common/types.h" #include "shader_recompiler/exception.h" +#include "video_core/amdgpu/pixel_format.h" namespace Shader::IR { @@ -41,6 +42,16 @@ union TextureInstInfo { BitField<25, 2, u32> num_derivatives; }; +union BufferInstInfo { + u32 raw; + BitField<0, 1, u32> index_enable; + BitField<1, 1, u32> offset_enable; + BitField<2, 12, u32> inst_offset; + BitField<14, 4, AmdGpu::DataFormat> dmft; + BitField<18, 3, AmdGpu::NumberFormat> nfmt; + BitField<21, 1, u32> is_typed; +}; + enum class ScalarReg : u32 { S0, S1, diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 66d19620..cc3a0eb2 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -62,15 +62,16 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(const BufferResource&) const = default; +}; +using BufferResourceList = boost::container::static_vector; + struct Info { struct VsInput { AmdGpu::NumberFormat fmt; @@ -86,17 +100,31 @@ struct Info { AttributeFlags loads{}; AttributeFlags stores{}; + BufferResourceList buffers; std::span user_data; Stage stage; template T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { T data; - u32* base; - std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + const u32* base = user_data.data(); + if (ptr_index != IR::NumScalarRegs) { + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + } std::memcpy(&data, base + dword_offset, sizeof(T)); return data; } }; } // namespace Shader + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::Stage& stage, format_context& ctx) const { + constexpr static std::array names = {"vs", "tc", "te", "gs", "fs", "cs"}; + return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); + } +}; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 3e496f5e..86fa0559 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -27,6 +27,7 @@ struct Buffer { BitField<15, 4, DataFormat> data_format; BitField<19, 2, u32> element_size; BitField<21, 2, u32> index_stride; + BitField<23, 1, u32> add_tid_enable; }; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3db09efe..70e68a8f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/assert.h" @@ -25,9 +26,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul stages[i] = *infos[i]; } + desc_layout = BuildSetLayout(); + const vk::DescriptorSetLayout set_layout = *desc_layout; const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = 0U, - .pSetLayouts = nullptr, + .setLayoutCount = 1U, + .pSetLayouts = &set_layout, .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }; @@ -196,10 +199,32 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul GraphicsPipeline::~GraphicsPipeline() = default; +vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const { + u32 binding{}; + boost::container::small_vector bindings; + for (const auto& stage : stages) { + for (const auto& buffer : stage.buffers) { + bindings.push_back({ + .binding = binding++, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + }); + } + } + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + return instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); +} + void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { std::array buffers; std::array offsets; + // Bind vertex buffer. const auto& vs_info = stages[0]; const size_t num_buffers = vs_info.vs_inputs.size(); for (u32 i = 0; i < num_buffers; ++i) { @@ -210,6 +235,33 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); + + // Bind resource buffers and textures. + boost::container::static_vector buffer_infos; + boost::container::small_vector set_writes; + u32 binding{}; + + for (const auto& stage : stages) { + for (const auto& buffer : stage.buffers) { + const auto vsharp = stage.ReadUd(buffer.sgpr_base, buffer.dword_offset); + const auto [vk_buffer, offset] = memory->GetVulkanBuffer(vsharp.base_address); + buffer_infos.push_back({ + .buffer = vk_buffer, + .offset = offset, + .range = vsharp.stride * vsharp.num_records, + }); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &buffer_infos.back(), + }); + } + } + + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 47cc5c23..75bd85ec 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -54,12 +54,16 @@ public: return *pipeline; } +private: + vk::UniqueDescriptorSetLayout BuildSetLayout() const; + private: const Instance& instance; Scheduler& scheduler; vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; - std::array stages; + vk::UniqueDescriptorSetLayout desc_layout; + std::array stages{}; PipelineKey key; }; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 32dca0c5..7419461c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -150,6 +150,7 @@ bool Instance::CreateDevice() { tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME); + add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6de86c4c..d917f4e4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,7 +123,12 @@ std::unique_ptr PipelineCache::CreatePipeline() { std::move(info)); // Compile IR to SPIR-V - const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]); + const auto profile = Shader::Profile{.supported_spirv = 0x00010600U}; + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i]); + std::ofstream file("shader0.spv", std::ios::out | std::ios::binary); + file.write((const char*)spv_code.data(), spv_code.size() * 4); + file.close(); + stages[i] = CompileSPV(spv_code, instance.GetDevice()); infos[i] = &programs[i].info; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3d301f62..a7483c27 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -61,7 +61,7 @@ void Rasterizer::Draw(bool is_indexed) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); } @@ -85,7 +85,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { return index_size / sizeof(u16); } if (!is_indexed) { - return 0; + return regs.num_indices; } const VAddr index_address = regs.index_base_address.Address();