video_core: Add constant buffer support (#147)
This commit is contained in:
parent
3c90b8ac00
commit
8dfa5782b2
|
@ -189,6 +189,7 @@ public:
|
|||
} else {
|
||||
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
|
||||
}
|
||||
std::fflush(stdout);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -204,6 +204,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory);
|
||||
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
|
||||
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
|
||||
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);
|
||||
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
|
||||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
|
||||
|
|
|
@ -114,4 +114,9 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
|
|||
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "");
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
|
||||
}
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -39,5 +39,6 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t l
|
|||
int flags, const char* name);
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags);
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -123,6 +123,17 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
impl.Unmap(virtual_addr, size);
|
||||
}
|
||||
|
||||
int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
|
||||
const auto it = FindVMA(addr);
|
||||
const auto& vma = it->second;
|
||||
ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped");
|
||||
|
||||
*start = reinterpret_cast<void*>(vma.base);
|
||||
*end = reinterpret_cast<void*>(vma.base + vma.size);
|
||||
*prot = static_cast<u32>(vma.prot);
|
||||
return SCE_OK;
|
||||
}
|
||||
|
||||
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
|
||||
auto it = mapped_memories.upper_bound(addr);
|
||||
it = std::prev(it);
|
||||
|
@ -243,7 +254,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
|||
constexpr vk::BufferUsageFlags MapFlags =
|
||||
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
|
||||
const vk::StructureChain buffer_info = {
|
||||
vk::BufferCreateInfo{
|
||||
|
|
|
@ -107,6 +107,8 @@ public:
|
|||
|
||||
void UnmapMemory(VAddr virtual_addr, size_t size);
|
||||
|
||||
int QueryProtection(VAddr addr, void** start, void** end, u32* prot);
|
||||
|
||||
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
|
||||
|
||||
private:
|
||||
|
|
|
@ -29,6 +29,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitGetUserData(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetScalarRegister(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
@ -95,4 +99,38 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
|||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address) {
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const Id buffer = ctx.buffers[handle.U32()];
|
||||
const Id type = ctx.info.buffers[handle.U32()].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
||||
if (info.index_enable && info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (info.index_enable) {
|
||||
boost::container::static_vector<Id, 4> ids;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], ctx.Def(address), ctx.ConstU32(i))};
|
||||
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
||||
ids.push_back(ctx.OpLoad(ctx.F32[1], ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.F32[4], ids);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -34,6 +34,7 @@ void EmitGetVcc(EmitContext& ctx);
|
|||
void EmitSetVcc(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitGetUserData(EmitContext& ctx);
|
||||
void EmitGetScalarRegister(EmitContext& ctx);
|
||||
void EmitSetScalarRegister(EmitContext& ctx);
|
||||
void EmitGetVectorRegister(EmitContext& ctx);
|
||||
|
@ -46,6 +47,14 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
|
|||
const IR::Value& offset);
|
||||
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
|
||||
const IR::Value& offset);
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address);
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address);
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address);
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
||||
const IR::Value& address);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
|
|
|
@ -36,7 +36,8 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
|||
} // Anonymous namespace
|
||||
|
||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
|
||||
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} {
|
||||
: Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
|
||||
stage{program.info.stage} {
|
||||
u32& uniform_binding{bindings.unified};
|
||||
u32& storage_binding{bindings.unified};
|
||||
u32& texture_binding{bindings.unified};
|
||||
|
@ -44,6 +45,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings
|
|||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces(program);
|
||||
DefineBuffers(program.info);
|
||||
}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
@ -107,8 +109,8 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
}
|
||||
|
||||
void EmitContext::DefineInterfaces(const IR::Program& program) {
|
||||
DefineInputs(program);
|
||||
DefineOutputs(program);
|
||||
DefineInputs(program.info);
|
||||
DefineOutputs(program.info);
|
||||
}
|
||||
|
||||
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
|
@ -164,8 +166,7 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
const auto& info = program.info;
|
||||
void EmitContext::DefineInputs(const Info& info) {
|
||||
switch (stage) {
|
||||
case Stage::Vertex:
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
|
@ -201,8 +202,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs(const IR::Program& program) {
|
||||
const auto& info = program.info;
|
||||
void EmitContext::DefineOutputs(const Info& info) {
|
||||
switch (stage) {
|
||||
case Stage::Vertex:
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
@ -234,4 +234,45 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers(const Info& info) {
|
||||
const auto define_buffer = [&](const BufferResource& buffer, Id type, u32 element_size,
|
||||
char type_char, u32 index) {
|
||||
ASSERT(buffer.stride % element_size == 0);
|
||||
const u32 num_elements = buffer.stride * buffer.num_records / element_size;
|
||||
const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))};
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, element_size);
|
||||
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
const auto name =
|
||||
fmt::format("{}_cbuf_block_{}{}", stage, type_char, element_size * CHAR_BIT);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
|
||||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||
if (buffer.is_storage) {
|
||||
storage_f32 = TypePointer(storage_class, type);
|
||||
} else {
|
||||
uniform_f32 = TypePointer(storage_class, type);
|
||||
}
|
||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("c{}", index));
|
||||
|
||||
binding++;
|
||||
buffers.push_back(id);
|
||||
interfaces.push_back(id);
|
||||
};
|
||||
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
ASSERT(True(buffer.used_types & IR::Type::F32));
|
||||
define_buffer(buffer, F32[1], 4, 'f', i);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -114,6 +114,7 @@ public:
|
|||
return ConstantComposite(type, constituents);
|
||||
}
|
||||
|
||||
Info& info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
|
||||
|
@ -141,6 +142,9 @@ public:
|
|||
Id output_u32{};
|
||||
Id output_f32{};
|
||||
|
||||
Id uniform_f32{};
|
||||
Id storage_f32{};
|
||||
|
||||
boost::container::small_vector<Id, 16> interfaces;
|
||||
|
||||
Id output_position{};
|
||||
|
@ -148,6 +152,9 @@ public:
|
|||
Id base_vertex{};
|
||||
std::array<Id, 8> frag_color{};
|
||||
|
||||
u32 binding{};
|
||||
boost::container::small_vector<Id, 4> buffers;
|
||||
|
||||
struct SpirvAttribute {
|
||||
Id id;
|
||||
Id pointer_type;
|
||||
|
@ -160,8 +167,9 @@ public:
|
|||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineInterfaces(const IR::Program& program);
|
||||
void DefineInputs(const IR::Program& program);
|
||||
void DefineOutputs(const IR::Program& program);
|
||||
void DefineInputs(const Info& info);
|
||||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
};
|
||||
|
|
|
@ -36,7 +36,7 @@ Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*bloc
|
|||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.Imm32(0U));
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -171,6 +171,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.TBUFFER_LOAD_FORMAT_XYZW(inst);
|
||||
break;
|
||||
case Opcode::S_MOV_B64:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
|
|
|
@ -48,6 +48,9 @@ public:
|
|||
void V_CMP_EQ_U32(const GcnInst& inst);
|
||||
void V_CNDMASK_B32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
||||
|
|
|
@ -100,4 +100,35 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
info.is_typed.Assign(1);
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(4, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -111,6 +111,10 @@ void IREmitter::Epilogue() {
|
|||
Inst(Opcode::Epilogue);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
||||
template <>
|
||||
U32 IREmitter::GetScalarReg(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetScalarRegister, reg);
|
||||
|
@ -233,6 +237,22 @@ F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32&
|
|||
return Inst<F32>(Opcode::ReadConstBufferF32, handle, index, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
|
||||
case 2:
|
||||
return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
|
||||
case 3:
|
||||
return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
|
||||
case 4:
|
||||
return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
|
||||
default:
|
||||
throw InvalidArgument("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
|
|
@ -42,6 +42,8 @@ public:
|
|||
void Prologue();
|
||||
void Epilogue();
|
||||
|
||||
U32 GetUserData(IR::ScalarReg reg);
|
||||
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T GetScalarReg(IR::ScalarReg reg);
|
||||
template <typename T = U32>
|
||||
|
@ -69,6 +71,9 @@ public:
|
|||
template <typename T = U32>
|
||||
[[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset);
|
||||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetSignFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
|
||||
|
|
|
@ -19,6 +19,7 @@ OPCODE(ReadConstBuffer, U32, Opaq
|
|||
OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
OPCODE(GetScalarRegister, U32, ScalarReg, )
|
||||
OPCODE(SetScalarRegister, Void, ScalarReg, U32, )
|
||||
OPCODE(GetVectorRegister, U32, VectorReg, )
|
||||
|
@ -42,6 +43,12 @@ OPCODE(UndefU16, U16,
|
|||
OPCODE(UndefU32, U32, )
|
||||
OPCODE(UndefU64, U64, )
|
||||
|
||||
// Buffer operations
|
||||
OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
|
|
|
@ -4,28 +4,51 @@
|
|||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <optional>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
|
||||
struct SharpLocation {
|
||||
IR::ScalarReg eud_ptr;
|
||||
u32 index_dwords;
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
|
||||
auto operator<=>(const SharpLocation&) const = default;
|
||||
};
|
||||
|
||||
bool IsResourceInstruction(const IR::Inst& inst) {
|
||||
bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferF32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferLoadType(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
return IR::Type::F32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
bool IsImageInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
||||
|
@ -44,32 +67,26 @@ bool IsResourceInstruction(const IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
/*class Descriptors {
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(TextureDescriptors& texture_descriptors_)
|
||||
: texture_descriptors{texture_descriptors_} {}
|
||||
explicit Descriptors(BufferResourceList& buffer_resources_)
|
||||
: buffer_resources{buffer_resources_} {}
|
||||
|
||||
u32 Add(const TextureDescriptor& desc) {
|
||||
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
|
||||
desc.has_secondary == existing.has_secondary &&
|
||||
desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset &&
|
||||
desc.shift_left == existing.shift_left &&
|
||||
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
|
||||
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
|
||||
desc.secondary_shift_left == existing.secondary_shift_left &&
|
||||
desc.count == existing.count && desc.size_shift == existing.size_shift;
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
})};
|
||||
// TODO: Read this from TIC
|
||||
texture_descriptors[index].is_multisample |= desc.is_multisample;
|
||||
auto& buffer = buffer_resources[index];
|
||||
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
|
||||
buffer.is_storage |= desc.is_storage;
|
||||
buffer.used_types |= desc.used_types;
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors, typename Descriptor, typename Func>
|
||||
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
||||
// TODO: Handle arrays
|
||||
const auto it{std::ranges::find_if(descriptors, pred)};
|
||||
if (it != descriptors.end()) {
|
||||
return static_cast<u32>(std::distance(descriptors.begin(), it));
|
||||
|
@ -78,17 +95,16 @@ private:
|
|||
return static_cast<u32>(descriptors.size()) - 1;
|
||||
}
|
||||
|
||||
TextureDescriptors& texture_descriptors;
|
||||
};*/
|
||||
BufferResourceList& buffer_resources;
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
SharpLocation TrackSharp(const IR::Value& handle) {
|
||||
IR::Inst* inst = handle.InstRecursive();
|
||||
if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) {
|
||||
SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return SharpLocation{
|
||||
.eud_ptr = IR::ScalarReg::Max,
|
||||
.index_dwords = inst->Arg(0).U32(),
|
||||
.sgpr_base = u32(IR::ScalarReg::Max),
|
||||
.dword_offset = u32(inst->Arg(0).ScalarReg()),
|
||||
};
|
||||
}
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
||||
|
@ -108,21 +124,55 @@ SharpLocation TrackSharp(const IR::Value& handle) {
|
|||
|
||||
// Return retrieved location.
|
||||
return SharpLocation{
|
||||
.eud_ptr = base,
|
||||
.index_dwords = dword_offset,
|
||||
.sgpr_base = u32(base),
|
||||
.dword_offset = dword_offset,
|
||||
};
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = u32(buffer.stride),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferLoadType(inst),
|
||||
.is_storage = buffer.base_address % 64 != 0,
|
||||
});
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
}
|
||||
// Calculate buffer address.
|
||||
const u32 dword_stride = buffer.stride / sizeof(u32);
|
||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||
IR::U32 address = ir.Imm32(dword_offset);
|
||||
if (inst_info.index_enable && inst_info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (inst_info.index_enable) {
|
||||
const IR::U32 index{inst.Arg(1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
}
|
||||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (!IsResourceInstruction(inst)) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto loc = TrackSharp(producer->Arg(0));
|
||||
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,
|
||||
loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -41,6 +42,16 @@ union TextureInstInfo {
|
|||
BitField<25, 2, u32> num_derivatives;
|
||||
};
|
||||
|
||||
union BufferInstInfo {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 4, AmdGpu::DataFormat> dmft;
|
||||
BitField<18, 3, AmdGpu::NumberFormat> nfmt;
|
||||
BitField<21, 1, u32> is_typed;
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
S0,
|
||||
S1,
|
||||
|
|
|
@ -62,15 +62,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
|
||||
// Run optimization passes
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
|
||||
for (const auto& block : program.blocks) {
|
||||
fmt::print("{}\n", IR::DumpBlock(*block));
|
||||
}
|
||||
std::fflush(stdout);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader {
|
||||
|
@ -39,6 +41,18 @@ enum class TextureType : u32 {
|
|||
};
|
||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 stride;
|
||||
u32 num_records;
|
||||
IR::Type used_types;
|
||||
bool is_storage;
|
||||
|
||||
auto operator<=>(const BufferResource&) const = default;
|
||||
};
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
|
||||
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
AmdGpu::NumberFormat fmt;
|
||||
|
@ -86,17 +100,31 @@ struct Info {
|
|||
AttributeFlags loads{};
|
||||
AttributeFlags stores{};
|
||||
|
||||
BufferResourceList buffers;
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
template <typename T>
|
||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
T data;
|
||||
u32* base;
|
||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||
const u32* base = user_data.data();
|
||||
if (ptr_index != IR::NumScalarRegs) {
|
||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||
}
|
||||
std::memcpy(&data, base + dword_offset, sizeof(T));
|
||||
return data;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::Stage> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::Stage& stage, format_context& ctx) const {
|
||||
constexpr static std::array names = {"vs", "tc", "te", "gs", "fs", "cs"};
|
||||
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -27,6 +27,7 @@ struct Buffer {
|
|||
BitField<15, 4, DataFormat> data_format;
|
||||
BitField<19, 2, u32> element_size;
|
||||
BitField<21, 2, u32> index_stride;
|
||||
BitField<23, 1, u32> add_tid_enable;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
|
@ -25,9 +26,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
stages[i] = *infos[i];
|
||||
}
|
||||
|
||||
desc_layout = BuildSetLayout();
|
||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 0U,
|
||||
.pSetLayouts = nullptr,
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &set_layout,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
};
|
||||
|
@ -196,10 +199,32 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const {
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
});
|
||||
}
|
||||
}
|
||||
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
return instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
}
|
||||
|
||||
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
|
||||
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
|
||||
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
|
||||
|
||||
// Bind vertex buffer.
|
||||
const auto& vs_info = stages[0];
|
||||
const size_t num_buffers = vs_info.vs_inputs.size();
|
||||
for (u32 i = 0; i < num_buffers; ++i) {
|
||||
|
@ -210,6 +235,33 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
|
|||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
|
||||
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(vsharp.base_address);
|
||||
buffer_infos.push_back({
|
||||
.buffer = vk_buffer,
|
||||
.offset = offset,
|
||||
.range = vsharp.stride * vsharp.num_records,
|
||||
});
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -54,12 +54,16 @@ public:
|
|||
return *pipeline;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::UniqueDescriptorSetLayout BuildSetLayout() const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
std::array<Shader::Info, MaxShaderStages> stages;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
std::array<Shader::Info, MaxShaderStages> stages{};
|
||||
PipelineKey key;
|
||||
};
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ bool Instance::CreateDevice() {
|
|||
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
|
||||
const auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
|
|
|
@ -123,7 +123,12 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
|||
std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
|
||||
const auto profile = Shader::Profile{.supported_spirv = 0x00010600U};
|
||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i]);
|
||||
std::ofstream file("shader0.spv", std::ios::out | std::ios::binary);
|
||||
file.write((const char*)spv_code.data(), spv_code.size() * 4);
|
||||
file.close();
|
||||
|
||||
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
||||
infos[i] = &programs[i].info;
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ void Rasterizer::Draw(bool is_indexed) {
|
|||
if (is_indexed) {
|
||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||
} else {
|
||||
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
|
||||
cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0);
|
||||
}
|
||||
cmdbuf.endRendering();
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
|||
return index_size / sizeof(u16);
|
||||
}
|
||||
if (!is_indexed) {
|
||||
return 0;
|
||||
return regs.num_indices;
|
||||
}
|
||||
|
||||
const VAddr index_address = regs.index_base_address.Address();
|
||||
|
|
Loading…
Reference in New Issue