shader_recompiler: Inline constant buffer impl

2024-07-03 05:43:36 +03:00 · 2024-07-03 05:43:36 +03:00 · 63801cfa35
parent f212f43e18
commit 63801cfa35
20 changed files with 189 additions and 42 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -288,6 +288,10 @@ void EmitGetVcc(EmitContext& ctx) {
    UNREACHABLE_MSG("Unreachable instruction");
 }

+void EmitGetSccLo(EmitContext& ctx) {
+    UNREACHABLE_MSG("Unreachable instruction");
+}
+
 void EmitGetVccLo(EmitContext& ctx) {
    UNREACHABLE_MSG("Unreachable instruction");
 }
@ -308,6 +312,10 @@ void EmitSetVcc(EmitContext& ctx) {
    UNREACHABLE_MSG("Unreachable instruction");
 }

+void EmitSetSccLo(EmitContext& ctx) {
+    UNREACHABLE_MSG("Unreachable instruction");
+}
+
 void EmitSetVccLo(EmitContext& ctx) {
    UNREACHABLE_MSG("Unreachable instruction");
 }
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@ -33,11 +33,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx);
 void EmitGetScc(EmitContext& ctx);
 void EmitGetExec(EmitContext& ctx);
 void EmitGetVcc(EmitContext& ctx);
+void EmitGetSccLo(EmitContext& ctx);
 void EmitGetVccLo(EmitContext& ctx);
 void EmitGetVccHi(EmitContext& ctx);
 void EmitSetScc(EmitContext& ctx);
 void EmitSetExec(EmitContext& ctx);
 void EmitSetVcc(EmitContext& ctx);
+void EmitSetSccLo(EmitContext& ctx);
 void EmitSetVccLo(EmitContext& ctx);
 void EmitSetVccHi(EmitContext& ctx);
 void EmitPrologue(EmitContext& ctx);
@ -245,6 +247,7 @@ Id EmitFPIsInf32(EmitContext& ctx, Id value);
 Id EmitFPIsInf64(EmitContext& ctx, Id value);
 Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
+Id EmitIAddCary32(EmitContext& ctx, Id a, Id b);
 Id EmitISub32(EmitContext& ctx, Id a, Id b);
 Id EmitISub64(EmitContext& ctx, Id a, Id b);
 Id EmitSMulExt(EmitContext& ctx, Id a, Id b);
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@ -60,6 +60,10 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
    return ctx.OpIAdd(ctx.U64, a, b);
 }

+Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b);
+}
+
 Id EmitISub32(EmitContext& ctx, Id a, Id b) {
    return ctx.OpISub(ctx.U32[1], a, b);
 }
--- a/src/shader_recompiler/frontend/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/structured_control_flow.cpp
@ -633,7 +633,7 @@ private:
                if (!stmt.block->is_dummy) {
                    const u32 start = stmt.block->begin_index;
                    const u32 size = stmt.block->end_index - start + 1;
-                    Translate(current_block, inst_list.subspan(start, size), info);
+                    Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), info);
                }
                break;
            }
--- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
    ir.SetScc(ir.Imm1(false));
 }

+void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
+    // This only really exists to let resource tracking pass know
+    // there is an inline cbuf.
+    SetDst(inst.dst[0], ir.Imm32(pc));
+}
+
+void Translator::S_ADDC_U32(const GcnInst& inst) {
+    const IR::U32 src0{GetSrc(inst.src[0])};
+    const IR::U32 src1{GetSrc(inst.src[1])};
+    SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
+}
+
 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp
@ -30,7 +30,8 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
        }
        return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
    }();
-    const IR::Value vsharp = ir.GetScalarReg(sbase);
+    const IR::Value vsharp = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
+                                                   ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
    IR::ScalarReg dst_reg{inst.dst[0].code};
    for (u32 i = 0; i < num_dwords; i++) {
        const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -236,7 +236,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
    }
 }

-void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
+void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
    if (inst_list.empty()) {
        return;
    }
@ -833,6 +833,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
        case Opcode::S_ADD_U32:
            translator.S_ADD_U32(inst);
            break;
+        case Opcode::S_ADDC_U32:
+            translator.S_ADDC_U32(inst);
+            break;
        case Opcode::S_SUB_U32:
        case Opcode::S_SUB_I32:
            translator.S_SUB_U32(inst);
@ -878,6 +881,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
        case Opcode::V_READFIRSTLANE_B32:
            translator.V_READFIRSTLANE_B32(inst);
            break;
+        case Opcode::S_GETPC_B64:
+            translator.S_GETPC_B64(block_base, inst);
+            break;
        case Opcode::S_NOP:
        case Opcode::S_CBRANCH_EXECZ:
        case Opcode::S_CBRANCH_SCC0:
@ -895,6 +901,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
                      magic_enum::enum_name(inst.opcode), opcode);
            info.translation_failed = true;
        }
+        block_base += inst.length;
    }
 }

--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@ -80,6 +80,8 @@ public:
    void S_BREV_B32(const GcnInst& inst);
    void S_ADD_U32(const GcnInst& inst);
    void S_SUB_U32(const GcnInst& inst);
+    void S_GETPC_B64(u32 pc, const GcnInst& inst);
+    void S_ADDC_U32(const GcnInst& inst);

    // Scalar Memory
    void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -192,6 +194,6 @@ private:
    static std::array<bool, IR::NumScalarRegs> exec_contexts;
 };

-void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
+void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@ -250,7 +250,9 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
        info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
    }

-    const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
+    const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
+                                                   ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
+    const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
    const IR::VectorReg dst_reg{inst.src[1].code};
    if (num_dwords == 1) {
        ir.SetVectorReg(dst_reg, IR::F32{value});
@ -309,7 +311,9 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
                                      ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
        break;
    }
-    ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
+    const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
+                                                   ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
+    ir.StoreBuffer(num_dwords, handle, address, value, info);
 }

 void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -212,6 +212,10 @@ U1 IREmitter::GetVcc() {
    return Inst<U1>(Opcode::GetVcc);
 }

+U32 IREmitter::GetSccLo() {
+    return Inst<U32>(Opcode::GetSccLo);
+}
+
 U32 IREmitter::GetVccLo() {
    return Inst<U32>(Opcode::GetVccLo);
 }
@ -232,6 +236,10 @@ void IREmitter::SetVcc(const U1& value) {
    Inst(Opcode::SetVcc, value);
 }

+void IREmitter::SetSccLo(const U32& value) {
+    Inst(Opcode::SetSccLo, value);
+}
+
 void IREmitter::SetVccLo(const U32& value) {
    Inst(Opcode::SetVccLo, value);
 }
@ -898,6 +906,18 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
    }
 }

+Value IREmitter::IAddCary(const U32& a, const U32& b) {
+    if (a.Type() != b.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::IAddCary32, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
 U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
    if (a.Type() != b.Type()) {
        UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -64,11 +64,13 @@ public:
    [[nodiscard]] U1 GetScc();
    [[nodiscard]] U1 GetExec();
    [[nodiscard]] U1 GetVcc();
+    [[nodiscard]] U32 GetSccLo();
    [[nodiscard]] U32 GetVccLo();
    [[nodiscard]] U32 GetVccHi();
    void SetScc(const U1& value);
    void SetExec(const U1& value);
    void SetVcc(const U1& value);
+    void SetSccLo(const U32& value);
    void SetVccLo(const U32& value);
    void SetVccHi(const U32& value);

@ -151,8 +153,9 @@ public:
    [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);

    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+    [[nodiscard]] Value IAddCary(const U32& a, const U32& b);
    [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
-    [[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
+    [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false);
    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
    [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
    [[nodiscard]] U32U64 INeg(const U32U64& value);
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -56,11 +56,13 @@ OPCODE(SetAttribute,                                        Void,           Attr
 OPCODE(GetScc,                                             U1,             Void,                                                                            )
 OPCODE(GetExec,                                            U1,             Void,                                                                            )
 OPCODE(GetVcc,                                             U1,             Void,                                                                            )
+OPCODE(GetSccLo,                                           U32,            Void,                                                                            )
 OPCODE(GetVccLo,                                           U32,            Void,                                                                            )
 OPCODE(GetVccHi,                                           U32,            Void,                                                                            )
 OPCODE(SetScc,                                             Void,           U1,                                                                              )
 OPCODE(SetExec,                                            Void,           U1,                                                                              )
 OPCODE(SetVcc,                                             Void,           U1,                                                                              )
+OPCODE(SetSccLo,                                           Void,           U32,                                                                             )
 OPCODE(SetVccLo,                                           Void,           U32,                                                                             )
 OPCODE(SetVccHi,                                           Void,           U32,                                                                             )

@ -216,6 +218,7 @@ OPCODE(FPIsInf64,                                           U1,             F64,
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
 OPCODE(IAdd64,                                              U64,            U64,            U64,                                                            )
+OPCODE(IAddCary32,                                          U32x2,          U32,            U32,                                                            )
 OPCODE(ISub32,                                              U32,            U32,            U32,                                                            )
 OPCODE(ISub64,                                              U64,            U64,            U64,                                                            )
 OPCODE(IMul32,                                              U32,            U32,            U32,                                                            )
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -138,7 +138,8 @@ public:
    u32 Add(const BufferResource& desc) {
        const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
            return desc.sgpr_base == existing.sgpr_base &&
-                   desc.dword_offset == existing.dword_offset;
+                   desc.dword_offset == existing.dword_offset &&
+                   desc.inline_cbuf == existing.inline_cbuf;
        })};
        auto& buffer = buffer_resources[index];
        ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records);
@ -219,13 +220,55 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
    };
 }

+static constexpr size_t MaxUboSize = 65536;
+
+s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) {
+    /**
+     * Assert for the following pattern
+     * s_getpc_b64     s[32:33]
+     * s_add_u32       s32, <const>, s32
+     * s_addc_u32      s33, 0, s33
+     * s_mov_b32       s35, <const>
+     * s_movk_i32      s34, <const>
+     * buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
+     **/
+    IR::Inst* handle = inst.Arg(0).InstRecursive();
+    IR::Inst* p0 = handle->Arg(0).InstRecursive();
+    if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) {
+        return -1;
+    }
+    IR::Inst* p1 = handle->Arg(1).InstRecursive();
+    if (p1->GetOpcode() != IR::Opcode::IAdd32) {
+        return -1;
+    }
+    if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
+        return -1;
+    }
+    // We have found this pattern. Build the sharp and assign a binding to it.
+    cbuf.raw0 = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
+    cbuf.num_records = handle->Arg(2).U32();
+    cbuf.raw11 = handle->Arg(3).U32();
+    return descriptors.Add(BufferResource{
+        .sgpr_base = std::numeric_limits<u32>::max(),
+        .dword_offset = 0,
+        .stride = cbuf.GetStride(),
+        .num_records = u32(cbuf.num_records),
+        .used_types = BufferDataType(inst),
+        .inline_cbuf = cbuf,
+        .is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
+    });
+}
+
 void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
                            Descriptors& descriptors) {
-    static constexpr size_t MaxUboSize = 65536;
-    IR::Inst* producer = inst.Arg(0).InstRecursive();
+    s32 binding{};
+    AmdGpu::Buffer buffer;
+    if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
+        IR::Inst* handle = inst.Arg(0).InstRecursive();
+        IR::Inst* producer = handle->Arg(0).InstRecursive();
        const auto sharp = TrackSharp(producer);
-    const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
-    const u32 binding = descriptors.Add(BufferResource{
+        buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
+        binding = descriptors.Add(BufferResource{
            .sgpr_base = sharp.sgpr_base,
            .dword_offset = sharp.dword_offset,
            .stride = buffer.GetStride(),
@ -233,6 +276,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
            .used_types = BufferDataType(inst),
            .is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
        });
+    }
+
    const auto inst_info = inst.Flags<IR::BufferInstInfo>();
    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
    // Replace handle with binding index in buffer resource list.
@ -240,7 +285,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
    if (inst_info.is_typed) {
        ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
-               inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
+               (inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
+                inst_info.dmft == AmdGpu::DataFormat::Format32_32_32));
    }
    if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
        inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
--- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
 struct ExecFlagTag : FlagTag {};
 struct VccFlagTag : FlagTag {};
 struct VccLoTag : FlagTag {};
+struct SccLoTag : FlagTag {};
 struct VccHiTag : FlagTag {};

 struct GotoVariable : FlagTag {
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
 };

 using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
-                             VccFlagTag, VccLoTag, VccHiTag>;
+                             VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
 using ValueMap = std::unordered_map<IR::Block*, IR::Value>;

 struct DefTable {
@ -83,6 +84,13 @@ struct DefTable {
        exec_flag.insert_or_assign(block, value);
    }

+    const IR::Value& Def(IR::Block* block, SccLoTag) {
+        return scc_lo_flag[block];
+    }
+    void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
+        scc_lo_flag.insert_or_assign(block, value);
+    }
+
    const IR::Value& Def(IR::Block* block, VccLoTag) {
        return vcc_lo_flag[block];
    }
@ -108,6 +116,7 @@ struct DefTable {
    ValueMap scc_flag;
    ValueMap exec_flag;
    ValueMap vcc_flag;
+    ValueMap scc_lo_flag;
    ValueMap vcc_lo_flag;
    ValueMap vcc_hi_flag;
 };
@ -124,6 +133,10 @@ IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
    return IR::Opcode::UndefU32;
 }

+IR::Opcode UndefOpcode(const SccLoTag&) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
 IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
    return IR::Opcode::UndefU32;
 }
@ -321,6 +334,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
    case IR::Opcode::SetVcc:
        pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
        break;
+    case IR::Opcode::SetSccLo:
+        pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
+        break;
    case IR::Opcode::SetVccLo:
        pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
        break;
@ -350,6 +366,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
    case IR::Opcode::GetVcc:
        inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
        break;
+    case IR::Opcode::GetSccLo:
+        inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
+        break;
    case IR::Opcode::GetVccLo:
        inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
        break;
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -4,7 +4,6 @@
 #pragma once

 #include <span>
-#include <vector>
 #include <boost/container/static_vector.hpp>
 #include "common/assert.h"
 #include "common/types.h"
@ -69,15 +68,18 @@ enum class VsOutput : u32 {
 };
 using VsOutputMap = std::array<VsOutput, 4>;

+struct Info;
+
 struct BufferResource {
    u32 sgpr_base;
    u32 dword_offset;
    u32 stride;
    u32 num_records;
    IR::Type used_types;
+    AmdGpu::Buffer inline_cbuf;
    bool is_storage;

-    auto operator<=>(const BufferResource&) const = default;
+    constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
 };
 using BufferResourceList = boost::container::static_vector<BufferResource, 16>;

@ -162,6 +164,7 @@ struct Info {
    std::span<const u32> user_data;
    Stage stage;

+    uintptr_t pgm_base{};
    u32 shared_memory_size{};
    bool uses_group_quad{};
    bool uses_shared_u8{};
@ -180,6 +183,10 @@ struct Info {
    }
 };

+constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
+    return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
+}
+
 } // namespace Shader

 template <>
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@ -85,14 +85,14 @@ struct Liverpool {
        } settings;
        UserData user_data;

-        template <typename T = u8>
-        const T* Address() const {
+        template <typename T = u8*>
+        const T Address() const {
            const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
-            return reinterpret_cast<const T*>(addr);
+            return reinterpret_cast<const T>(addr);
        }

        std::span<const u32> Code() const {
-            const u32* code = Address<u32>();
+            const u32* code = Address<u32*>();
            BinaryInfo bininfo;
            std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
            const u32 num_dwords = bininfo.length / sizeof(u32);
@ -128,10 +128,10 @@ struct Liverpool {
        INSERT_PADDING_WORDS(0x2A);
        UserData user_data;

-        template <typename T = u8>
-        const T* Address() const {
+        template <typename T = u8*>
+        const T Address() const {
            const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
-            return reinterpret_cast<const T*>(addr);
+            return reinterpret_cast<const T>(addr);
        }

        u32 SharedMemSize() const noexcept {
@ -140,7 +140,7 @@ struct Liverpool {
        }

        std::span<const u32> Code() const {
-            const u32* code = Address<u32>();
+            const u32* code = Address<u32*>();
            BinaryInfo bininfo;
            std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
            const u32 num_dwords = bininfo.length / sizeof(u32);
@ -150,7 +150,7 @@ struct Liverpool {

    template <typename Shader>
    static constexpr auto* GetBinaryInfo(const Shader& sh) {
-        const auto* code = sh.template Address<u32>();
+        const auto* code = sh.template Address<u32*>();
        const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
        ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
        return bininfo;
--- a/src/video_core/amdgpu/resource.h
+++ b/src/video_core/amdgpu/resource.h
@ -22,6 +22,7 @@ enum class CompSwizzle : u32 {
 // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
 struct Buffer {
    union {
+        u64 raw0;
        BitField<0, 44, u64> base_address;
        BitField<48, 14, u64> stride;
        BitField<62, 1, u64> cache_swizzle;
@ -29,6 +30,7 @@ struct Buffer {
    };
    u32 num_records;
    union {
+        u32 raw11;
        BitField<0, 3, u32> dst_sel_x;
        BitField<3, 3, u32> dst_sel_y;
        BitField<6, 3, u32> dst_sel_z;
@ -41,6 +43,14 @@ struct Buffer {
        BitField<23, 1, u32> add_tid_enable;
    };

+    operator bool() const noexcept {
+        return base_address != 0;
+    }
+
+    bool operator==(const Buffer& other) const noexcept {
+        return std::memcmp(this, &other, sizeof(Buffer)) == 0;
+    }
+
    CompSwizzle GetSwizzle(u32 comp) const noexcept {
        return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
    }
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@ -91,7 +91,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
    u32 binding{};

    for (const auto& buffer : info.buffers) {
-        const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
+        const auto vsharp = buffer.GetVsharp(info);
        const u32 size = vsharp.GetSize();
        const VAddr address = vsharp.base_address.Value();
        texture_cache.OnCpuWrite(address);
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -326,7 +326,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&

    for (const auto& stage : stages) {
        for (const auto& buffer : stage.buffers) {
-            const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
+            const auto vsharp = buffer.GetVsharp(stage);
            const VAddr address = vsharp.base_address.Value();
            const u32 size = vsharp.GetSize();
            const u32 offset = staging.Copy(address, size,
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -198,7 +198,7 @@ void PipelineCache::RefreshGraphicsKey() {

    for (u32 i = 0; i < MaxShaderStages; i++) {
        auto* pgm = regs.ProgramForStage(i);
-        if (!pgm || !pgm->Address<u32>()) {
+        if (!pgm || !pgm->Address<u32*>()) {
            key.stage_hashes[i] = 0;
            continue;
        }
@ -248,17 +248,14 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
            DumpShader(code, hash, stage, "bin");
        }

-        if (hash == 0xcafe3773 || hash == 0xc6602df2) {
-            return nullptr;
-        }
-
        block_pool.ReleaseContents();
        inst_pool.ReleaseContents();

        // Recompile shader to IR.
        try {
            LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
-            const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
+            Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
+            info.pgm_base = pgm->Address<uintptr_t>();
            programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

            // Compile IR to SPIR-V
@ -296,8 +293,9 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
    // Recompile shader to IR.
    try {
        LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
-        const Shader::Info info =
+        Shader::Info info =
            MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
+        info.pgm_base = cs_pgm.Address<uintptr_t>();
        auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

        // Compile IR to SPIR-V