video_core: Add constant buffer support (#147)

This commit is contained in:
TheTurtle 2024-05-26 15:51:35 +03:00 committed by GitHub
parent 3c90b8ac00
commit 8dfa5782b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 395 additions and 56 deletions

View File

@ -189,6 +189,7 @@ public:
} else { } else {
ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
} }
std::fflush(stdout);
} }
private: private:

View File

@ -204,6 +204,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory); LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory);
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory); LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory); LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory); LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);

View File

@ -114,4 +114,9 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "");
} }
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
auto* memory = Core::Memory::Instance();
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
}
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View File

@ -39,5 +39,6 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t l
int flags, const char* name); int flags, const char* name);
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
int flags); int flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View File

@ -123,6 +123,17 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
impl.Unmap(virtual_addr, size); impl.Unmap(virtual_addr, size);
} }
int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
const auto it = FindVMA(addr);
const auto& vma = it->second;
ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped");
*start = reinterpret_cast<void*>(vma.base);
*end = reinterpret_cast<void*>(vma.base + vma.size);
*prot = static_cast<u32>(vma.prot);
return SCE_OK;
}
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) { std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
auto it = mapped_memories.upper_bound(addr); auto it = mapped_memories.upper_bound(addr);
it = std::prev(it); it = std::prev(it);
@ -243,7 +254,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
constexpr vk::BufferUsageFlags MapFlags = constexpr vk::BufferUsageFlags MapFlags =
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformBuffer; vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
const vk::StructureChain buffer_info = { const vk::StructureChain buffer_info = {
vk::BufferCreateInfo{ vk::BufferCreateInfo{

View File

@ -107,6 +107,8 @@ public:
void UnmapMemory(VAddr virtual_addr, size_t size); void UnmapMemory(VAddr virtual_addr, size_t size);
int QueryProtection(VAddr addr, void** start, void** end, u32* prot);
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr); std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
private: private:

View File

@ -29,6 +29,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
} }
} // Anonymous namespace } // Anonymous namespace
void EmitGetUserData(EmitContext&) {
throw LogicError("Unreachable instruction");
}
void EmitGetScalarRegister(EmitContext&) { void EmitGetScalarRegister(EmitContext&) {
throw LogicError("Unreachable instruction"); throw LogicError("Unreachable instruction");
} }
@ -95,4 +99,38 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
ctx.OpStore(pointer, value); ctx.OpStore(pointer, value);
} }
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address) {
UNREACHABLE();
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address) {
UNREACHABLE();
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address) {
UNREACHABLE();
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address) {
const auto info = inst->Flags<IR::BufferInstInfo>();
const Id buffer = ctx.buffers[handle.U32()];
const Id type = ctx.info.buffers[handle.U32()].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
if (info.index_enable && info.offset_enable) {
UNREACHABLE();
} else if (info.index_enable) {
boost::container::static_vector<Id, 4> ids;
for (u32 i = 0; i < 4; i++) {
const Id index{ctx.OpIAdd(ctx.U32[1], ctx.Def(address), ctx.ConstU32(i))};
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
ids.push_back(ctx.OpLoad(ctx.F32[1], ptr));
}
return ctx.OpCompositeConstruct(ctx.F32[4], ids);
}
UNREACHABLE();
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -34,6 +34,7 @@ void EmitGetVcc(EmitContext& ctx);
void EmitSetVcc(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx); void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx);
void EmitGetUserData(EmitContext& ctx);
void EmitGetScalarRegister(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx);
void EmitSetScalarRegister(EmitContext& ctx); void EmitSetScalarRegister(EmitContext& ctx);
void EmitGetVectorRegister(EmitContext& ctx); void EmitGetVectorRegister(EmitContext& ctx);
@ -46,6 +47,14 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
const IR::Value& offset); const IR::Value& offset);
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
const IR::Value& offset); const IR::Value& offset);
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address);
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address);
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address);
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
const IR::Value& address);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);

View File

@ -36,7 +36,8 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace } // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} { : Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
stage{program.info.stage} {
u32& uniform_binding{bindings.unified}; u32& uniform_binding{bindings.unified};
u32& storage_binding{bindings.unified}; u32& storage_binding{bindings.unified};
u32& texture_binding{bindings.unified}; u32& texture_binding{bindings.unified};
@ -44,6 +45,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings
AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::Shader);
DefineArithmeticTypes(); DefineArithmeticTypes();
DefineInterfaces(program); DefineInterfaces(program);
DefineBuffers(program.info);
} }
EmitContext::~EmitContext() = default; EmitContext::~EmitContext() = default;
@ -107,8 +109,8 @@ void EmitContext::DefineArithmeticTypes() {
} }
void EmitContext::DefineInterfaces(const IR::Program& program) { void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineInputs(program); DefineInputs(program.info);
DefineOutputs(program); DefineOutputs(program.info);
} }
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
@ -164,8 +166,7 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
} }
} }
void EmitContext::DefineInputs(const IR::Program& program) { void EmitContext::DefineInputs(const Info& info) {
const auto& info = program.info;
switch (stage) { switch (stage) {
case Stage::Vertex: case Stage::Vertex:
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
@ -201,8 +202,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
} }
} }
void EmitContext::DefineOutputs(const IR::Program& program) { void EmitContext::DefineOutputs(const Info& info) {
const auto& info = program.info;
switch (stage) { switch (stage) {
case Stage::Vertex: case Stage::Vertex:
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
@ -234,4 +234,45 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
} }
} }
void EmitContext::DefineBuffers(const Info& info) {
const auto define_buffer = [&](const BufferResource& buffer, Id type, u32 element_size,
char type_char, u32 index) {
ASSERT(buffer.stride % element_size == 0);
const u32 num_elements = buffer.stride * buffer.num_records / element_size;
const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))};
Decorate(record_array_type, spv::Decoration::ArrayStride, element_size);
const Id struct_type{TypeStruct(record_array_type)};
const auto name =
fmt::format("{}_cbuf_block_{}{}", stage, type_char, element_size * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
const auto storage_class =
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
if (buffer.is_storage) {
storage_f32 = TypePointer(storage_class, type);
} else {
uniform_f32 = TypePointer(storage_class, type);
}
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("c{}", index));
binding++;
buffers.push_back(id);
interfaces.push_back(id);
};
for (u32 i = 0; const auto& buffer : info.buffers) {
ASSERT(True(buffer.used_types & IR::Type::F32));
define_buffer(buffer, F32[1], 4, 'f', i);
i++;
}
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -114,6 +114,7 @@ public:
return ConstantComposite(type, constituents); return ConstantComposite(type, constituents);
} }
Info& info;
const Profile& profile; const Profile& profile;
Stage stage{}; Stage stage{};
@ -141,6 +142,9 @@ public:
Id output_u32{}; Id output_u32{};
Id output_f32{}; Id output_f32{};
Id uniform_f32{};
Id storage_f32{};
boost::container::small_vector<Id, 16> interfaces; boost::container::small_vector<Id, 16> interfaces;
Id output_position{}; Id output_position{};
@ -148,6 +152,9 @@ public:
Id base_vertex{}; Id base_vertex{};
std::array<Id, 8> frag_color{}; std::array<Id, 8> frag_color{};
u32 binding{};
boost::container::small_vector<Id, 4> buffers;
struct SpirvAttribute { struct SpirvAttribute {
Id id; Id id;
Id pointer_type; Id pointer_type;
@ -160,8 +167,9 @@ public:
private: private:
void DefineArithmeticTypes(); void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program); void DefineInterfaces(const IR::Program& program);
void DefineInputs(const IR::Program& program); void DefineInputs(const Info& info);
void DefineOutputs(const IR::Program& program); void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
}; };

View File

@ -36,7 +36,7 @@ Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*bloc
// Initialize user data. // Initialize user data.
IR::ScalarReg dst_sreg = IR::ScalarReg::S0; IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
for (u32 i = 0; i < 16; i++) { for (u32 i = 0; i < 16; i++) {
ir.SetScalarReg(dst_sreg++, ir.Imm32(0U)); ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
} }
} }
@ -171,6 +171,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CNDMASK_B32: case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst); translator.V_CNDMASK_B32(inst);
break; break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
translator.TBUFFER_LOAD_FORMAT_XYZW(inst);
break;
case Opcode::S_MOV_B64: case Opcode::S_MOV_B64:
case Opcode::S_WQM_B64: case Opcode::S_WQM_B64:
case Opcode::V_INTERP_P1_F32: case Opcode::V_INTERP_P1_F32:

View File

@ -48,6 +48,9 @@ public:
void V_CMP_EQ_U32(const GcnInst& inst); void V_CMP_EQ_U32(const GcnInst& inst);
void V_CNDMASK_B32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst);
// Vector Memory
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
// Vector interpolation // Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst);

View File

@ -100,4 +100,35 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
} }
} }
void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
const IR::Value address = [&] -> IR::Value {
if (mtbuf.idxen && mtbuf.offen) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
}
if (mtbuf.idxen || mtbuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::Value soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset);
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
info.is_typed.Assign(1);
const IR::Value value = ir.LoadBuffer(4, ir.GetScalarReg(sharp), address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
for (u32 i = 0; i < 4; i++) {
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
}
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -111,6 +111,10 @@ void IREmitter::Epilogue() {
Inst(Opcode::Epilogue); Inst(Opcode::Epilogue);
} }
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetUserData, reg);
}
template <> template <>
U32 IREmitter::GetScalarReg(IR::ScalarReg reg) { U32 IREmitter::GetScalarReg(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetScalarRegister, reg); return Inst<U32>(Opcode::GetScalarRegister, reg);
@ -233,6 +237,22 @@ F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32&
return Inst<F32>(Opcode::ReadConstBufferF32, handle, index, offset); return Inst<F32>(Opcode::ReadConstBufferF32, handle, index, offset);
} }
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
BufferInstInfo info) {
switch (num_dwords) {
case 1:
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
case 2:
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
case 3:
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
case 4:
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
default:
throw InvalidArgument("Invalid number of dwords {}", num_dwords);
}
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());

View File

@ -42,6 +42,8 @@ public:
void Prologue(); void Prologue();
void Epilogue(); void Epilogue();
U32 GetUserData(IR::ScalarReg reg);
template <typename T = U32> template <typename T = U32>
[[nodiscard]] T GetScalarReg(IR::ScalarReg reg); [[nodiscard]] T GetScalarReg(IR::ScalarReg reg);
template <typename T = U32> template <typename T = U32>
@ -69,6 +71,9 @@ public:
template <typename T = U32> template <typename T = U32>
[[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset); [[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset);
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
BufferInstInfo info);
[[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetZeroFromOp(const Value& op);
[[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op);
[[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op);

View File

@ -19,6 +19,7 @@ OPCODE(ReadConstBuffer, U32, Opaq
OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 ) OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 )
// Context getters/setters // Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, )
OPCODE(GetScalarRegister, U32, ScalarReg, ) OPCODE(GetScalarRegister, U32, ScalarReg, )
OPCODE(SetScalarRegister, Void, ScalarReg, U32, ) OPCODE(SetScalarRegister, Void, ScalarReg, U32, )
OPCODE(GetVectorRegister, U32, VectorReg, ) OPCODE(GetVectorRegister, U32, VectorReg, )
@ -42,6 +43,12 @@ OPCODE(UndefU16, U16,
OPCODE(UndefU32, U32, ) OPCODE(UndefU32, U32, )
OPCODE(UndefU64, U64, ) OPCODE(UndefU64, U64, )
// Buffer operations
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
// Vector utility // Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )

View File

@ -4,28 +4,51 @@
#include <algorithm> #include <algorithm>
#include <bit> #include <bit>
#include <optional> #include <optional>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization { namespace Shader::Optimization {
namespace { namespace {
struct SharpLocation { struct SharpLocation {
IR::ScalarReg eud_ptr; u32 sgpr_base;
u32 index_dwords; u32 dword_offset;
auto operator<=>(const SharpLocation&) const = default; auto operator<=>(const SharpLocation&) const = default;
}; };
bool IsResourceInstruction(const IR::Inst& inst) { bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferF32: case IR::Opcode::ReadConstBufferF32:
return true;
default:
return false;
}
}
IR::Type BufferLoadType(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
return IR::Type::F32;
default:
UNREACHABLE();
}
}
bool IsImageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleExplicitLod:
case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleImplicitLod:
case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod:
@ -44,32 +67,26 @@ bool IsResourceInstruction(const IR::Inst& inst) {
} }
} }
/*class Descriptors { class Descriptors {
public: public:
explicit Descriptors(TextureDescriptors& texture_descriptors_) explicit Descriptors(BufferResourceList& buffer_resources_)
: texture_descriptors{texture_descriptors_} {} : buffer_resources{buffer_resources_} {}
u32 Add(const TextureDescriptor& desc) { u32 Add(const BufferResource& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth && return desc.sgpr_base == existing.sgpr_base &&
desc.has_secondary == existing.has_secondary && desc.dword_offset == existing.dword_offset;
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift;
})}; })};
// TODO: Read this from TIC auto& buffer = buffer_resources[index];
texture_descriptors[index].is_multisample |= desc.is_multisample; ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
buffer.is_storage |= desc.is_storage;
buffer.used_types |= desc.used_types;
return index; return index;
} }
private: private:
template <typename Descriptors, typename Descriptor, typename Func> template <typename Descriptors, typename Descriptor, typename Func>
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
// TODO: Handle arrays
const auto it{std::ranges::find_if(descriptors, pred)}; const auto it{std::ranges::find_if(descriptors, pred)};
if (it != descriptors.end()) { if (it != descriptors.end()) {
return static_cast<u32>(std::distance(descriptors.begin(), it)); return static_cast<u32>(std::distance(descriptors.begin(), it));
@ -78,17 +95,16 @@ private:
return static_cast<u32>(descriptors.size()) - 1; return static_cast<u32>(descriptors.size()) - 1;
} }
TextureDescriptors& texture_descriptors; BufferResourceList& buffer_resources;
};*/ };
} // Anonymous namespace } // Anonymous namespace
SharpLocation TrackSharp(const IR::Value& handle) { SharpLocation TrackSharp(const IR::Inst* inst) {
IR::Inst* inst = handle.InstRecursive(); if (inst->GetOpcode() == IR::Opcode::GetUserData) {
if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) {
return SharpLocation{ return SharpLocation{
.eud_ptr = IR::ScalarReg::Max, .sgpr_base = u32(IR::ScalarReg::Max),
.index_dwords = inst->Arg(0).U32(), .dword_offset = u32(inst->Arg(0).ScalarReg()),
}; };
} }
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory"); ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
@ -108,21 +124,55 @@ SharpLocation TrackSharp(const IR::Value& handle) {
// Return retrieved location. // Return retrieved location.
return SharpLocation{ return SharpLocation{
.eud_ptr = base, .sgpr_base = u32(base),
.index_dwords = dword_offset, .dword_offset = dword_offset,
}; };
} }
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
const u32 binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = u32(buffer.stride),
.num_records = u32(buffer.num_records),
.used_types = BufferLoadType(inst),
.is_storage = buffer.base_address % 64 != 0,
});
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
// Replace handle with binding index in buffer resource list.
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
}
// Calculate buffer address.
const u32 dword_stride = buffer.stride / sizeof(u32);
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
IR::U32 address = ir.Imm32(dword_offset);
if (inst_info.index_enable && inst_info.offset_enable) {
UNREACHABLE();
} else if (inst_info.index_enable) {
const IR::U32 index{inst.Arg(1)};
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
}
inst.SetArg(1, address);
}
void ResourceTrackingPass(IR::Program& program) { void ResourceTrackingPass(IR::Program& program) {
auto& info = program.info;
Descriptors descriptors{info.buffers};
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) { if (IsBufferInstruction(inst)) {
PatchBufferInstruction(*block, inst, info, descriptors);
continue; continue;
} }
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,
loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max);
} }
} }
} }

View File

@ -6,6 +6,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR { namespace Shader::IR {
@ -41,6 +42,16 @@ union TextureInstInfo {
BitField<25, 2, u32> num_derivatives; BitField<25, 2, u32> num_derivatives;
}; };
union BufferInstInfo {
u32 raw;
BitField<0, 1, u32> index_enable;
BitField<1, 1, u32> offset_enable;
BitField<2, 12, u32> inst_offset;
BitField<14, 4, AmdGpu::DataFormat> dmft;
BitField<18, 3, AmdGpu::NumberFormat> nfmt;
BitField<21, 1, u32> is_typed;
};
enum class ScalarReg : u32 { enum class ScalarReg : u32 {
S0, S0,
S1, S1,

View File

@ -62,15 +62,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
// Run optimization passes // Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program.blocks);
Shader::Optimization::CollectShaderInfoPass(program); Shader::Optimization::CollectShaderInfoPass(program);
for (const auto& block : program.blocks) { for (const auto& block : program.blocks) {
fmt::print("{}\n", IR::DumpBlock(*block)); fmt::print("{}\n", IR::DumpBlock(*block));
} }
std::fflush(stdout);
return program; return program;
} }

View File

@ -8,6 +8,8 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
namespace Shader { namespace Shader {
@ -39,6 +41,18 @@ enum class TextureType : u32 {
}; };
constexpr u32 NUM_TEXTURE_TYPES = 7; constexpr u32 NUM_TEXTURE_TYPES = 7;
struct BufferResource {
u32 sgpr_base;
u32 dword_offset;
u32 stride;
u32 num_records;
IR::Type used_types;
bool is_storage;
auto operator<=>(const BufferResource&) const = default;
};
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
struct Info { struct Info {
struct VsInput { struct VsInput {
AmdGpu::NumberFormat fmt; AmdGpu::NumberFormat fmt;
@ -86,17 +100,31 @@ struct Info {
AttributeFlags loads{}; AttributeFlags loads{};
AttributeFlags stores{}; AttributeFlags stores{};
BufferResourceList buffers;
std::span<const u32> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
template <typename T> template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data; T data;
u32* base; const u32* base = user_data.data();
if (ptr_index != IR::NumScalarRegs) {
std::memcpy(&base, &user_data[ptr_index], sizeof(base)); std::memcpy(&base, &user_data[ptr_index], sizeof(base));
}
std::memcpy(&data, base + dword_offset, sizeof(T)); std::memcpy(&data, base + dword_offset, sizeof(T));
return data; return data;
} }
}; };
} // namespace Shader } // namespace Shader
template <>
struct fmt::formatter<Shader::Stage> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(const Shader::Stage& stage, format_context& ctx) const {
constexpr static std::array names = {"vs", "tc", "te", "gs", "fs", "cs"};
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
}
};

View File

@ -27,6 +27,7 @@ struct Buffer {
BitField<15, 4, DataFormat> data_format; BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size; BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride; BitField<21, 2, u32> index_stride;
BitField<23, 1, u32> add_tid_enable;
}; };
}; };

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
@ -25,9 +26,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
stages[i] = *infos[i]; stages[i] = *infos[i];
} }
desc_layout = BuildSetLayout();
const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = { const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U, .setLayoutCount = 1U,
.pSetLayouts = nullptr, .pSetLayouts = &set_layout,
.pushConstantRangeCount = 0, .pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr, .pPushConstantRanges = nullptr,
}; };
@ -196,10 +199,32 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
GraphicsPipeline::~GraphicsPipeline() = default; GraphicsPipeline::~GraphicsPipeline() = default;
vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const {
u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
});
}
}
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
return instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
}
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers; std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets; std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
// Bind vertex buffer.
const auto& vs_info = stages[0]; const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size(); const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) { for (u32 i = 0; i < num_buffers; ++i) {
@ -210,6 +235,33 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(vsharp.base_address);
buffer_infos.push_back({
.buffer = vk_buffer,
.offset = offset,
.range = vsharp.stride * vsharp.num_records,
});
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &buffer_infos.back(),
});
}
}
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -54,12 +54,16 @@ public:
return *pipeline; return *pipeline;
} }
private:
vk::UniqueDescriptorSetLayout BuildSetLayout() const;
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
std::array<Shader::Info, MaxShaderStages> stages; vk::UniqueDescriptorSetLayout desc_layout;
std::array<Shader::Info, MaxShaderStages> stages{};
PipelineKey key; PipelineKey key;
}; };

View File

@ -150,6 +150,7 @@ bool Instance::CreateDevice() {
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME); index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
const auto family_properties = physical_device.getQueueFamilyProperties(); const auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) { if (family_properties.empty()) {

View File

@ -123,7 +123,12 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
std::move(info)); std::move(info));
// Compile IR to SPIR-V // Compile IR to SPIR-V
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]); const auto profile = Shader::Profile{.supported_spirv = 0x00010600U};
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i]);
std::ofstream file("shader0.spv", std::ios::out | std::ios::binary);
file.write((const char*)spv_code.data(), spv_code.size() * 4);
file.close();
stages[i] = CompileSPV(spv_code, instance.GetDevice()); stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info; infos[i] = &programs[i].info;
} }

View File

@ -61,7 +61,7 @@ void Rasterizer::Draw(bool is_indexed) {
if (is_indexed) { if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else { } else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0);
} }
cmdbuf.endRendering(); cmdbuf.endRendering();
} }
@ -85,7 +85,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
return index_size / sizeof(u16); return index_size / sizeof(u16);
} }
if (!is_indexed) { if (!is_indexed) {
return 0; return regs.num_indices;
} }
const VAddr index_address = regs.index_base_address.Address(); const VAddr index_address = regs.index_base_address.Address();