shader_recompiler: Inline constant buffer impl
This commit is contained in:
parent
f212f43e18
commit
63801cfa35
|
@ -288,6 +288,10 @@ void EmitGetVcc(EmitContext& ctx) {
|
|||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetVccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
@ -308,6 +312,10 @@ void EmitSetVcc(EmitContext& ctx) {
|
|||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetSccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitSetVccLo(EmitContext& ctx) {
|
||||
UNREACHABLE_MSG("Unreachable instruction");
|
||||
}
|
||||
|
|
|
@ -33,11 +33,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
|||
void EmitGetScc(EmitContext& ctx);
|
||||
void EmitGetExec(EmitContext& ctx);
|
||||
void EmitGetVcc(EmitContext& ctx);
|
||||
void EmitGetSccLo(EmitContext& ctx);
|
||||
void EmitGetVccLo(EmitContext& ctx);
|
||||
void EmitGetVccHi(EmitContext& ctx);
|
||||
void EmitSetScc(EmitContext& ctx);
|
||||
void EmitSetExec(EmitContext& ctx);
|
||||
void EmitSetVcc(EmitContext& ctx);
|
||||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
|
@ -245,6 +247,7 @@ Id EmitFPIsInf32(EmitContext& ctx, Id value);
|
|||
Id EmitFPIsInf64(EmitContext& ctx, Id value);
|
||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
|
||||
|
|
|
@ -60,6 +60,10 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
|
|||
return ctx.OpIAdd(ctx.U64, a, b);
|
||||
}
|
||||
|
||||
Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b);
|
||||
}
|
||||
|
||||
Id EmitISub32(EmitContext& ctx, Id a, Id b) {
|
||||
return ctx.OpISub(ctx.U32[1], a, b);
|
||||
}
|
||||
|
|
|
@ -633,7 +633,7 @@ private:
|
|||
if (!stmt.block->is_dummy) {
|
||||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, inst_list.subspan(start, size), info);
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), info);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
SetDst(inst.dst[0], ir.Imm32(pc));
|
||||
}
|
||||
|
||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -30,7 +30,8 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
}
|
||||
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
|
||||
}();
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::Value vsharp = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
|
||||
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
|
|
|
@ -236,7 +236,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -833,6 +833,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ADD_U32:
|
||||
translator.S_ADD_U32(inst);
|
||||
break;
|
||||
case Opcode::S_ADDC_U32:
|
||||
translator.S_ADDC_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SUB_U32:
|
||||
case Opcode::S_SUB_I32:
|
||||
translator.S_SUB_U32(inst);
|
||||
|
@ -878,6 +881,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_READFIRSTLANE_B32:
|
||||
translator.V_READFIRSTLANE_B32(inst);
|
||||
break;
|
||||
case Opcode::S_GETPC_B64:
|
||||
translator.S_GETPC_B64(block_base, inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
@ -895,6 +901,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
magic_enum::enum_name(inst.opcode), opcode);
|
||||
info.translation_failed = true;
|
||||
}
|
||||
block_base += inst.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -80,6 +80,8 @@ public:
|
|||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_ADDC_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -192,6 +194,6 @@ private:
|
|||
static std::array<bool, IR::NumScalarRegs> exec_contexts;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -250,7 +250,9 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
|
|||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
|
@ -309,7 +311,9 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
|
|
|
@ -212,6 +212,10 @@ U1 IREmitter::GetVcc() {
|
|||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetSccLo() {
|
||||
return Inst<U32>(Opcode::GetSccLo);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
@ -232,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
|
|||
Inst(Opcode::SetVcc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetSccLo(const U32& value) {
|
||||
Inst(Opcode::SetSccLo, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVccLo(const U32& value) {
|
||||
Inst(Opcode::SetVccLo, value);
|
||||
}
|
||||
|
@ -898,6 +906,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::IAddCary(const U32& a, const U32& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::IAddCary32, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
|
|
@ -64,11 +64,13 @@ public:
|
|||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
[[nodiscard]] U32 GetSccLo();
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
[[nodiscard]] U32 GetVccHi();
|
||||
void SetScc(const U1& value);
|
||||
void SetExec(const U1& value);
|
||||
void SetVcc(const U1& value);
|
||||
void SetSccLo(const U32& value);
|
||||
void SetVccLo(const U32& value);
|
||||
void SetVccHi(const U32& value);
|
||||
|
||||
|
@ -151,8 +153,9 @@ public:
|
|||
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
|
||||
|
||||
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
|
||||
[[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
|
||||
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
|
||||
[[nodiscard]] U32U64 INeg(const U32U64& value);
|
||||
|
|
|
@ -56,11 +56,13 @@ OPCODE(SetAttribute, Void, Attr
|
|||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetSccLo, U32, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(GetVccHi, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetExec, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(SetSccLo, Void, U32, )
|
||||
OPCODE(SetVccLo, Void, U32, )
|
||||
OPCODE(SetVccHi, Void, U32, )
|
||||
|
||||
|
@ -216,6 +218,7 @@ OPCODE(FPIsInf64, U1, F64,
|
|||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
OPCODE(IAdd64, U64, U64, U64, )
|
||||
OPCODE(IAddCary32, U32x2, U32, U32, )
|
||||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
|
|
|
@ -138,7 +138,8 @@ public:
|
|||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
desc.dword_offset == existing.dword_offset &&
|
||||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
|
||||
|
@ -219,20 +220,64 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
};
|
||||
}
|
||||
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) {
|
||||
/**
|
||||
* Assert for the following pattern
|
||||
* s_getpc_b64 s[32:33]
|
||||
* s_add_u32 s32, <const>, s32
|
||||
* s_addc_u32 s33, 0, s33
|
||||
* s_mov_b32 s35, <const>
|
||||
* s_movk_i32 s34, <const>
|
||||
* buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
|
||||
**/
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
||||
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return -1;
|
||||
}
|
||||
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
// We have found this pattern. Build the sharp and assign a binding to it.
|
||||
cbuf.raw0 = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
||||
cbuf.num_records = handle->Arg(2).U32();
|
||||
cbuf.raw11 = handle->Arg(3).U32();
|
||||
return descriptors.Add(BufferResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.stride = cbuf.GetStride(),
|
||||
.num_records = u32(cbuf.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.inline_cbuf = cbuf,
|
||||
.is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
});
|
||||
}
|
||||
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
|
@ -240,7 +285,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32));
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
|
|
|
@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
|
|||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
struct SccLoTag : FlagTag {};
|
||||
struct VccHiTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
|
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
|
|||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag, VccHiTag>;
|
||||
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -83,6 +84,13 @@ struct DefTable {
|
|||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SccLoTag) {
|
||||
return scc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
|
||||
scc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
|
@ -108,6 +116,7 @@ struct DefTable {
|
|||
ValueMap scc_flag;
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap scc_lo_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
ValueMap vcc_hi_flag;
|
||||
};
|
||||
|
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
|
|||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetSccLo:
|
||||
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
|
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetSccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
@ -69,15 +68,18 @@ enum class VsOutput : u32 {
|
|||
};
|
||||
using VsOutputMap = std::array<VsOutput, 4>;
|
||||
|
||||
struct Info;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 stride;
|
||||
u32 num_records;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_storage;
|
||||
|
||||
auto operator<=>(const BufferResource&) const = default;
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
};
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
|
||||
|
||||
|
@ -162,6 +164,7 @@ struct Info {
|
|||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
uintptr_t pgm_base{};
|
||||
u32 shared_memory_size{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared_u8{};
|
||||
|
@ -180,6 +183,10 @@ struct Info {
|
|||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
|
|
|
@ -85,14 +85,14 @@ struct Liverpool {
|
|||
} settings;
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -128,10 +128,10 @@ struct Liverpool {
|
|||
INSERT_PADDING_WORDS(0x2A);
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
u32 SharedMemSize() const noexcept {
|
||||
|
@ -140,7 +140,7 @@ struct Liverpool {
|
|||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -150,7 +150,7 @@ struct Liverpool {
|
|||
|
||||
template <typename Shader>
|
||||
static constexpr auto* GetBinaryInfo(const Shader& sh) {
|
||||
const auto* code = sh.template Address<u32>();
|
||||
const auto* code = sh.template Address<u32*>();
|
||||
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
||||
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||
return bininfo;
|
||||
|
|
|
@ -22,6 +22,7 @@ enum class CompSwizzle : u32 {
|
|||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
union {
|
||||
u64 raw0;
|
||||
BitField<0, 44, u64> base_address;
|
||||
BitField<48, 14, u64> stride;
|
||||
BitField<62, 1, u64> cache_swizzle;
|
||||
|
@ -29,6 +30,7 @@ struct Buffer {
|
|||
};
|
||||
u32 num_records;
|
||||
union {
|
||||
u32 raw11;
|
||||
BitField<0, 3, u32> dst_sel_x;
|
||||
BitField<3, 3, u32> dst_sel_y;
|
||||
BitField<6, 3, u32> dst_sel_z;
|
||||
|
@ -41,6 +43,14 @@ struct Buffer {
|
|||
BitField<23, 1, u32> add_tid_enable;
|
||||
};
|
||||
|
||||
operator bool() const noexcept {
|
||||
return base_address != 0;
|
||||
}
|
||||
|
||||
bool operator==(const Buffer& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
||||
}
|
||||
|
||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const auto vsharp = buffer.GetVsharp(info);
|
||||
const u32 size = vsharp.GetSize();
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
texture_cache.OnCpuWrite(address);
|
||||
|
|
|
@ -326,7 +326,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
|
||||
for (const auto& stage : stages) {
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||
const auto vsharp = buffer.GetVsharp(stage);
|
||||
const VAddr address = vsharp.base_address.Value();
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 offset = staging.Copy(address, size,
|
||||
|
|
|
@ -198,7 +198,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32>()) {
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -248,17 +248,14 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
DumpShader(code, hash, stage, "bin");
|
||||
}
|
||||
|
||||
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
@ -296,8 +293,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||
const Shader::Info info =
|
||||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
|
Loading…
Reference in New Issue