shader_recompiler: Inline constant buffer impl

This commit is contained in:
IndecisiveTurtle 2024-07-03 05:43:36 +03:00
parent f212f43e18
commit 63801cfa35
20 changed files with 189 additions and 42 deletions

View File

@ -288,6 +288,10 @@ void EmitGetVcc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
void EmitGetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitGetVccLo(EmitContext& ctx) { void EmitGetVccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
@ -308,6 +312,10 @@ void EmitSetVcc(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }
void EmitSetSccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}
void EmitSetVccLo(EmitContext& ctx) { void EmitSetVccLo(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction"); UNREACHABLE_MSG("Unreachable instruction");
} }

View File

@ -33,11 +33,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx); void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx); void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx);
void EmitGetSccLo(EmitContext& ctx);
void EmitGetVccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx);
void EmitGetVccHi(EmitContext& ctx); void EmitGetVccHi(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx); void EmitSetScc(EmitContext& ctx);
void EmitSetExec(EmitContext& ctx); void EmitSetExec(EmitContext& ctx);
void EmitSetVcc(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx);
void EmitSetSccLo(EmitContext& ctx);
void EmitSetVccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx);
void EmitSetVccHi(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx); void EmitPrologue(EmitContext& ctx);
@ -245,6 +247,7 @@ Id EmitFPIsInf32(EmitContext& ctx, Id value);
Id EmitFPIsInf64(EmitContext& ctx, Id value); Id EmitFPIsInf64(EmitContext& ctx, Id value);
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b);
Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b);
Id EmitSMulExt(EmitContext& ctx, Id a, Id b); Id EmitSMulExt(EmitContext& ctx, Id a, Id b);

View File

@ -60,6 +60,10 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
return ctx.OpIAdd(ctx.U64, a, b); return ctx.OpIAdd(ctx.U64, a, b);
} }
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) {
return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b);
}
Id EmitISub32(EmitContext& ctx, Id a, Id b) { Id EmitISub32(EmitContext& ctx, Id a, Id b) {
return ctx.OpISub(ctx.U32[1], a, b); return ctx.OpISub(ctx.U32[1], a, b);
} }

View File

@ -633,7 +633,7 @@ private:
if (!stmt.block->is_dummy) { if (!stmt.block->is_dummy) {
const u32 start = stmt.block->begin_index; const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1; const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, inst_list.subspan(start, size), info); Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), info);
} }
break; break;
} }

View File

@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
ir.SetScc(ir.Imm1(false)); ir.SetScc(ir.Imm1(false));
} }
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
SetDst(inst.dst[0], ir.Imm32(pc));
}
void Translator::S_ADDC_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -30,7 +30,8 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
} }
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}(); }();
const IR::Value vsharp = ir.GetScalarReg(sbase); const IR::Value vsharp = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
IR::ScalarReg dst_reg{inst.dst[0].code}; IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) { for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View File

@ -236,7 +236,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
} }
} }
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) { void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) { if (inst_list.empty()) {
return; return;
} }
@ -833,6 +833,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ADD_U32: case Opcode::S_ADD_U32:
translator.S_ADD_U32(inst); translator.S_ADD_U32(inst);
break; break;
case Opcode::S_ADDC_U32:
translator.S_ADDC_U32(inst);
break;
case Opcode::S_SUB_U32: case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32: case Opcode::S_SUB_I32:
translator.S_SUB_U32(inst); translator.S_SUB_U32(inst);
@ -878,6 +881,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_READFIRSTLANE_B32: case Opcode::V_READFIRSTLANE_B32:
translator.V_READFIRSTLANE_B32(inst); translator.V_READFIRSTLANE_B32(inst);
break; break;
case Opcode::S_GETPC_B64:
translator.S_GETPC_B64(block_base, inst);
break;
case Opcode::S_NOP: case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:
@ -895,6 +901,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
magic_enum::enum_name(inst.opcode), opcode); magic_enum::enum_name(inst.opcode), opcode);
info.translation_failed = true; info.translation_failed = true;
} }
block_base += inst.length;
} }
} }

View File

@ -80,6 +80,8 @@ public:
void S_BREV_B32(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst);
void S_ADD_U32(const GcnInst& inst); void S_ADD_U32(const GcnInst& inst);
void S_SUB_U32(const GcnInst& inst); void S_SUB_U32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_ADDC_U32(const GcnInst& inst);
// Scalar Memory // Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -192,6 +194,6 @@ private:
static std::array<bool, IR::NumScalarRegs> exec_contexts; static std::array<bool, IR::NumScalarRegs> exec_contexts;
}; };
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info); void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -250,7 +250,9 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt)); info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
} }
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code}; const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) { if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value}); ir.SetVectorReg(dst_reg, IR::F32{value});
@ -309,7 +311,9 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3)); ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break; break;
} }
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, info);
} }
void Translator::IMAGE_GET_LOD(const GcnInst& inst) { void Translator::IMAGE_GET_LOD(const GcnInst& inst) {

View File

@ -212,6 +212,10 @@ U1 IREmitter::GetVcc() {
return Inst<U1>(Opcode::GetVcc); return Inst<U1>(Opcode::GetVcc);
} }
U32 IREmitter::GetSccLo() {
return Inst<U32>(Opcode::GetSccLo);
}
U32 IREmitter::GetVccLo() { U32 IREmitter::GetVccLo() {
return Inst<U32>(Opcode::GetVccLo); return Inst<U32>(Opcode::GetVccLo);
} }
@ -232,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value); Inst(Opcode::SetVcc, value);
} }
void IREmitter::SetSccLo(const U32& value) {
Inst(Opcode::SetSccLo, value);
}
void IREmitter::SetVccLo(const U32& value) { void IREmitter::SetVccLo(const U32& value) {
Inst(Opcode::SetVccLo, value); Inst(Opcode::SetVccLo, value);
} }
@ -898,6 +906,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
} }
} }
Value IREmitter::IAddCary(const U32& a, const U32& b) {
if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IAddCary32, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

View File

@ -64,11 +64,13 @@ public:
[[nodiscard]] U1 GetScc(); [[nodiscard]] U1 GetScc();
[[nodiscard]] U1 GetExec(); [[nodiscard]] U1 GetExec();
[[nodiscard]] U1 GetVcc(); [[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetSccLo();
[[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi(); [[nodiscard]] U32 GetVccHi();
void SetScc(const U1& value); void SetScc(const U1& value);
void SetExec(const U1& value); void SetExec(const U1& value);
void SetVcc(const U1& value); void SetVcc(const U1& value);
void SetSccLo(const U32& value);
void SetVccLo(const U32& value); void SetVccLo(const U32& value);
void SetVccHi(const U32& value); void SetVccHi(const U32& value);
@ -151,8 +153,9 @@ public:
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32U64 INeg(const U32U64& value);

View File

@ -56,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
OPCODE(GetScc, U1, Void, ) OPCODE(GetScc, U1, Void, )
OPCODE(GetExec, U1, Void, ) OPCODE(GetExec, U1, Void, )
OPCODE(GetVcc, U1, Void, ) OPCODE(GetVcc, U1, Void, )
OPCODE(GetSccLo, U32, Void, )
OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, ) OPCODE(GetVccHi, U32, Void, )
OPCODE(SetScc, Void, U1, ) OPCODE(SetScc, Void, U1, )
OPCODE(SetExec, Void, U1, ) OPCODE(SetExec, Void, U1, )
OPCODE(SetVcc, Void, U1, ) OPCODE(SetVcc, Void, U1, )
OPCODE(SetSccLo, Void, U32, )
OPCODE(SetVccLo, Void, U32, ) OPCODE(SetVccLo, Void, U32, )
OPCODE(SetVccHi, Void, U32, ) OPCODE(SetVccHi, Void, U32, )
@ -216,6 +218,7 @@ OPCODE(FPIsInf64, U1, F64,
// Integer operations // Integer operations
OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, ) OPCODE(IAdd64, U64, U64, U64, )
OPCODE(IAddCary32, U32x2, U32, U32, )
OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, ) OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, ) OPCODE(IMul32, U32, U32, U32, )

View File

@ -138,7 +138,8 @@ public:
u32 Add(const BufferResource& desc) { u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base && return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset; desc.dword_offset == existing.dword_offset &&
desc.inline_cbuf == existing.inline_cbuf;
})}; })};
auto& buffer = buffer_resources[index]; auto& buffer = buffer_resources[index];
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records); ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
@ -219,13 +220,55 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
}; };
} }
static constexpr size_t MaxUboSize = 65536;
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) {
/**
* Assert for the following pattern
* s_getpc_b64 s[32:33]
* s_add_u32 s32, <const>, s32
* s_addc_u32 s33, 0, s33
* s_mov_b32 s35, <const>
* s_movk_i32 s34, <const>
* buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
**/
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
return -1;
}
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
return -1;
}
// We have found this pattern. Build the sharp and assign a binding to it.
cbuf.raw0 = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
cbuf.num_records = handle->Arg(2).U32();
cbuf.raw11 = handle->Arg(3).U32();
return descriptors.Add(BufferResource{
.sgpr_base = std::numeric_limits<u32>::max(),
.dword_offset = 0,
.stride = cbuf.GetStride(),
.num_records = u32(cbuf.num_records),
.used_types = BufferDataType(inst),
.inline_cbuf = cbuf,
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
});
}
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) { Descriptors& descriptors) {
static constexpr size_t MaxUboSize = 65536; s32 binding{};
IR::Inst* producer = inst.Arg(0).InstRecursive(); AmdGpu::Buffer buffer;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer); const auto sharp = TrackSharp(producer);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset); buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
const u32 binding = descriptors.Add(BufferResource{ binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base, .sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset, .dword_offset = sharp.dword_offset,
.stride = buffer.GetStride(), .stride = buffer.GetStride(),
@ -233,6 +276,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
.used_types = BufferDataType(inst), .used_types = BufferDataType(inst),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize, .is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
}); });
}
const auto inst_info = inst.Flags<IR::BufferInstInfo>(); const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
// Replace handle with binding index in buffer resource list. // Replace handle with binding index in buffer resource list.
@ -240,7 +285,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
if (inst_info.is_typed) { if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32); (inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32));
} }
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {

View File

@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {}; struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {}; struct VccLoTag : FlagTag {};
struct SccLoTag : FlagTag {};
struct VccHiTag : FlagTag {}; struct VccHiTag : FlagTag {};
struct GotoVariable : FlagTag { struct GotoVariable : FlagTag {
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
}; };
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag, using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, VccLoTag, VccHiTag>; VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>; using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable { struct DefTable {
@ -83,6 +84,13 @@ struct DefTable {
exec_flag.insert_or_assign(block, value); exec_flag.insert_or_assign(block, value);
} }
const IR::Value& Def(IR::Block* block, SccLoTag) {
return scc_lo_flag[block];
}
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
scc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccLoTag) { const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block]; return vcc_lo_flag[block];
} }
@ -108,6 +116,7 @@ struct DefTable {
ValueMap scc_flag; ValueMap scc_flag;
ValueMap exec_flag; ValueMap exec_flag;
ValueMap vcc_flag; ValueMap vcc_flag;
ValueMap scc_lo_flag;
ValueMap vcc_lo_flag; ValueMap vcc_lo_flag;
ValueMap vcc_hi_flag; ValueMap vcc_hi_flag;
}; };
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
return IR::Opcode::UndefU32; return IR::Opcode::UndefU32;
} }
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const VccHiTag&) noexcept { IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
return IR::Opcode::UndefU32; return IR::Opcode::UndefU32;
} }
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVcc: case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0)); pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break; break;
case IR::Opcode::SetSccLo:
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVccLo: case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0)); pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break; break;
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVcc: case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block)); inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break; break;
case IR::Opcode::GetSccLo:
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
break;
case IR::Opcode::GetVccLo: case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block)); inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break; break;

View File

@ -4,7 +4,6 @@
#pragma once #pragma once
#include <span> #include <span>
#include <vector>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
@ -69,15 +68,18 @@ enum class VsOutput : u32 {
}; };
using VsOutputMap = std::array<VsOutput, 4>; using VsOutputMap = std::array<VsOutput, 4>;
struct Info;
struct BufferResource { struct BufferResource {
u32 sgpr_base; u32 sgpr_base;
u32 dword_offset; u32 dword_offset;
u32 stride; u32 stride;
u32 num_records; u32 num_records;
IR::Type used_types; IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage; bool is_storage;
auto operator<=>(const BufferResource&) const = default; constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
}; };
using BufferResourceList = boost::container::static_vector<BufferResource, 16>; using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
@ -162,6 +164,7 @@ struct Info {
std::span<const u32> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
uintptr_t pgm_base{};
u32 shared_memory_size{}; u32 shared_memory_size{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_shared_u8{}; bool uses_shared_u8{};
@ -180,6 +183,10 @@ struct Info {
} }
}; };
constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
} // namespace Shader } // namespace Shader
template <> template <>

View File

@ -85,14 +85,14 @@ struct Liverpool {
} settings; } settings;
UserData user_data; UserData user_data;
template <typename T = u8> template <typename T = u8*>
const T* Address() const { const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr); return reinterpret_cast<const T>(addr);
} }
std::span<const u32> Code() const { std::span<const u32> Code() const {
const u32* code = Address<u32>(); const u32* code = Address<u32*>();
BinaryInfo bininfo; BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32); const u32 num_dwords = bininfo.length / sizeof(u32);
@ -128,10 +128,10 @@ struct Liverpool {
INSERT_PADDING_WORDS(0x2A); INSERT_PADDING_WORDS(0x2A);
UserData user_data; UserData user_data;
template <typename T = u8> template <typename T = u8*>
const T* Address() const { const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr); return reinterpret_cast<const T>(addr);
} }
u32 SharedMemSize() const noexcept { u32 SharedMemSize() const noexcept {
@ -140,7 +140,7 @@ struct Liverpool {
} }
std::span<const u32> Code() const { std::span<const u32> Code() const {
const u32* code = Address<u32>(); const u32* code = Address<u32*>();
BinaryInfo bininfo; BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32); const u32 num_dwords = bininfo.length / sizeof(u32);
@ -150,7 +150,7 @@ struct Liverpool {
template <typename Shader> template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) { static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32>(); const auto* code = sh.template Address<u32*>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2); const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header"); ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo; return bininfo;

View File

@ -22,6 +22,7 @@ enum class CompSwizzle : u32 {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer { struct Buffer {
union { union {
u64 raw0;
BitField<0, 44, u64> base_address; BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride; BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle; BitField<62, 1, u64> cache_swizzle;
@ -29,6 +30,7 @@ struct Buffer {
}; };
u32 num_records; u32 num_records;
union { union {
u32 raw11;
BitField<0, 3, u32> dst_sel_x; BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y; BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z; BitField<6, 3, u32> dst_sel_z;
@ -41,6 +43,14 @@ struct Buffer {
BitField<23, 1, u32> add_tid_enable; BitField<23, 1, u32> add_tid_enable;
}; };
operator bool() const noexcept {
return base_address != 0;
}
bool operator==(const Buffer& other) const noexcept {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
CompSwizzle GetSwizzle(u32 comp) const noexcept { CompSwizzle GetSwizzle(u32 comp) const noexcept {
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7); return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
} }

View File

@ -91,7 +91,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
u32 binding{}; u32 binding{};
for (const auto& buffer : info.buffers) { for (const auto& buffer : info.buffers) {
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset); const auto vsharp = buffer.GetVsharp(info);
const u32 size = vsharp.GetSize(); const u32 size = vsharp.GetSize();
const VAddr address = vsharp.base_address.Value(); const VAddr address = vsharp.base_address.Value();
texture_cache.OnCpuWrite(address); texture_cache.OnCpuWrite(address);

View File

@ -326,7 +326,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) { for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) { for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset); const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address.Value(); const VAddr address = vsharp.base_address.Value();
const u32 size = vsharp.GetSize(); const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(address, size, const u32 offset = staging.Copy(address, size,

View File

@ -198,7 +198,7 @@ void PipelineCache::RefreshGraphicsKey() {
for (u32 i = 0; i < MaxShaderStages; i++) { for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i); auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) { if (!pgm || !pgm->Address<u32*>()) {
key.stage_hashes[i] = 0; key.stage_hashes[i] = 0;
continue; continue;
} }
@ -248,17 +248,14 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
DumpShader(code, hash, stage, "bin"); DumpShader(code, hash, stage, "bin");
} }
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
return nullptr;
}
block_pool.ReleaseContents(); block_pool.ReleaseContents();
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();
// Recompile shader to IR. // Recompile shader to IR.
try { try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V // Compile IR to SPIR-V
@ -296,8 +293,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
// Recompile shader to IR. // Recompile shader to IR.
try { try {
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
const Shader::Info info = Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
info.pgm_base = cs_pgm.Address<uintptr_t>();
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V // Compile IR to SPIR-V