video_core: Compile shader permutations

2024-08-25 14:03:14 +03:00 · 2024-08-25 14:03:14 +03:00 · 007147cc60
parent 790d19e59b
commit 007147cc60
14 changed files with 194 additions and 197 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -99,7 +99,7 @@ Id TypeId(const EmitContext& ctx, IR::Type type) {
    }
 }

-void Traverse(EmitContext& ctx, IR::Program& program) {
+void Traverse(EmitContext& ctx, const IR::Program& program) {
    IR::Block* current_block{};
    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
        switch (node.type) {
@ -162,7 +162,7 @@ void Traverse(EmitContext& ctx, IR::Program& program) {
    }
 }

-Id DefineMain(EmitContext& ctx, IR::Program& program) {
+Id DefineMain(EmitContext& ctx, const IR::Program& program) {
    const Id void_function{ctx.TypeFunction(ctx.void_id)};
    const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
    for (IR::Block* const block : program.blocks) {
@ -229,7 +229,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
    ctx.AddEntryPoint(execution_model, main, "main", interfaces);
 }

-void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
+void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
    auto inst{program.blocks.front()->begin()};
    size_t block_index{0};
    ctx.PatchDeferredPhi([&](size_t phi_arg) {
@ -248,8 +248,8 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
 }
 } // Anonymous namespace

-std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) {
-    EmitContext ctx{profile, program, binding};
+std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) {
+    EmitContext ctx{profile, program.info, binding};
    const Id main{DefineMain(ctx, program)};
    DefineEntryPoint(program, ctx, main);
    if (program.info.stage == Stage::Vertex) {
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@ -9,7 +9,7 @@

 namespace Shader::Backend::SPIRV {

-[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program,
+[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program,
                                         u32& binding);

 } // namespace Shader::Backend::SPIRV
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -41,9 +41,9 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar

 } // Anonymous namespace

-EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding_)
-    : Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
-      stage{program.info.stage}, binding{binding_} {
+EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_)
+    : Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage},
+      binding{binding_} {
    AddCapability(spv::Capability::Shader);
    DefineArithmeticTypes();
    DefineInterfaces();
@ -524,10 +524,11 @@ void EmitContext::DefineSharedMemory() {
    if (!info.uses_shared) {
        return;
    }
-    if (info.shared_memory_size == 0) {
-        info.shared_memory_size = DefaultSharedMemSize;
+    u32 shared_memory_size = info.shared_memory_size;
+    if (shared_memory_size == 0) {
+        shared_memory_size = DefaultSharedMemSize;
    }
-    const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
+    const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
    const Id type{TypeArray(U32[1], ConstU32(num_elements))};
    shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
    shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@ -36,7 +36,7 @@ struct VectorIds {

 class EmitContext final : public Sirit::Module {
 public:
-    explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
+    explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding);
    ~EmitContext();

    Id Def(const IR::Value& value);
@ -124,7 +124,7 @@ public:
        return ConstantComposite(type, constituents);
    }

-    Info& info;
+    const Info& info;
    const Profile& profile;
    Stage stage{};

--- a/src/shader_recompiler/ir/program.h
+++ b/src/shader_recompiler/ir/program.h
@ -12,11 +12,13 @@
 namespace Shader::IR {

 struct Program {
+    explicit Program(Info& info_) : info{info_} {}
+
    AbstractSyntaxList syntax_list;
    BlockList blocks;
    BlockList post_order_blocks;
    std::vector<Gcn::GcnInst> ins_list;
-    Info info;
+    Info& info;
 };

 [[nodiscard]] std::string DumpProgram(const Program& program);
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@ -29,7 +29,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {

 IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
                             Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
-                             const Info&& info, const Profile& profile) {
+                             Info& info, const Profile& profile) {
    // Ensure first instruction is expected.
    constexpr u32 token_mov_vcchi = 0xBEEB03FF;
    ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -38,7 +38,7 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
    Gcn::GcnDecodeContext decoder;

    // Decode and save instructions
-    IR::Program program;
+    IR::Program program{info};
    program.ins_list.reserve(token.size());
    while (!slice.atEnd()) {
        program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@ -49,7 +49,6 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
    Gcn::CFG cfg{gcn_block_pool, program.ins_list};

    // Structurize control flow graph and create program.
-    program.info = std::move(info);
    program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
    program.blocks = GenerateBlocks(program.syntax_list);
    program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
--- a/src/shader_recompiler/recompiler.h
+++ b/src/shader_recompiler/recompiler.h
@ -13,7 +13,7 @@ struct Profile;

 [[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
                                           Common::ObjectPool<IR::Block>& block_pool,
-                                           std::span<const u32> code, const Info&& info,
+                                           std::span<const u32> code, Info& info,
                                           const Profile& profile);

 } // namespace Shader
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -12,6 +12,10 @@
 #include "shader_recompiler/ir/type.h"
 #include "video_core/amdgpu/resource.h"

+[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
+    return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
+}
+
 namespace Shader {

 static constexpr size_t NumUserDataRegs = 16;
@ -83,6 +87,18 @@ struct BufferResource {
    bool is_instance_data{};
    bool is_written{};

+    u64 GetKey(const Info& info) const {
+        static constexpr size_t MaxUboSize = 65536;
+        const auto sharp = GetVsharp(info);
+        const u32 stride = sharp.GetStride();
+        u64 key = stride | (sharp.data_format << 14) | (sharp.num_format << 18);
+        if (!is_written) {
+            key <<= 1;
+            key |= (stride * sharp.num_records) > MaxUboSize;
+        }
+        return key;
+    }
+
    constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
 };
 using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
@ -94,6 +110,13 @@ struct ImageResource {
    AmdGpu::NumberFormat nfmt;
    bool is_storage;
    bool is_depth;
+
+    u64 GetKey(const Info& info) const {
+        const auto sharp = GetTsharp(info);
+        return sharp.type;
+    }
+
+    constexpr AmdGpu::Image GetTsharp(const Info& info) const noexcept;
 };
 using ImageResourceList = boost::container::static_vector<ImageResource, 16>;

@ -214,6 +237,21 @@ struct Info {
        return data;
    }

+    size_t NumBindings() const noexcept {
+        return buffers.size() + images.size() + samplers.size();
+    }
+
+    u64 GetStageSpecializedKey(u32 binding = 0) const noexcept {
+        u64 key = HashCombine(pgm_hash, binding);
+        for (const auto& buffer : buffers) {
+            key = HashCombine(key, buffer.GetKey(*this));
+        }
+        for (const auto& image : images) {
+            key = HashCombine(key, image.GetKey(*this));
+        }
+        return key;
+    }
+
    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
        u32 vertex_offset = 0;
        u32 instance_offset = 0;
@ -231,6 +269,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc
    return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
 }

+constexpr AmdGpu::Image ImageResource::GetTsharp(const Info& info) const noexcept {
+    return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
+}
+
 constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept {
    return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
 }
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@ -13,12 +13,11 @@ namespace Vulkan {

 ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
                                 vk::PipelineCache pipeline_cache, u64 compute_key_,
-                                 const Program* program)
-    : instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_},
-      info{&program->pgm.info} {
+                                 const Shader::Info& info_, vk::ShaderModule module)
+    : instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{&info_} {
    const vk::PipelineShaderStageCreateInfo shader_ci = {
        .stage = vk::ShaderStageFlagBits::eCompute,
-        .module = program->module,
+        .module = module,
        .pName = "main",
    };

@ -141,8 +140,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
    }

    for (const auto& image_desc : info->images) {
-        const auto tsharp =
-            info->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
+        const auto tsharp = image_desc.GetTsharp(*info);
        VideoCore::ImageInfo image_info{tsharp};
        VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
        const auto& image_view = texture_cache.FindTexture(image_info, view_info);
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@ -3,7 +3,7 @@

 #pragma once

-#include "shader_recompiler/ir/program.h"
+#include <boost/container/small_vector.hpp>
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/renderer_vulkan/vk_common.h"

@ -17,18 +17,11 @@ namespace Vulkan {
 class Instance;
 class Scheduler;

-struct Program {
-    Shader::IR::Program pgm;
-    std::vector<u32> spv;
-    vk::ShaderModule module;
-    u32 end_binding;
-};
-
 class ComputePipeline {
 public:
    explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
                             vk::PipelineCache pipeline_cache, u64 compute_key,
-                             const Program* program);
+                             const Shader::Info& info, vk::ShaderModule module);
    ~ComputePipeline();

    [[nodiscard]] vk::Pipeline Handle() const noexcept {
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -19,15 +19,11 @@ namespace Vulkan {
 GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
                                   const GraphicsPipelineKey& key_,
                                   vk::PipelineCache pipeline_cache,
-                                   std::span<const Program*, MaxShaderStages> programs)
+                                   std::span<const Shader::Info*, MaxShaderStages> infos,
+                                   std::span<const vk::ShaderModule> modules)
    : instance{instance_}, scheduler{scheduler_}, key{key_} {
    const vk::Device device = instance.GetDevice();
-    for (u32 i = 0; i < MaxShaderStages; i++) {
-        if (!programs[i]) {
-            continue;
-        }
-        stages[i] = &programs[i]->pgm.info;
-    }
+    std::ranges::copy(infos, stages.begin());
    BuildDescSetLayout();

    const vk::PushConstantRange push_constants = {
@ -194,16 +190,18 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
    auto stage = u32(Shader::Stage::Vertex);
    boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
        shader_stages;
+    if (infos[stage]) {
        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
            .stage = vk::ShaderStageFlagBits::eVertex,
-        .module = programs[stage]->module,
+            .module = modules[stage],
            .pName = "main",
        });
+    }
    stage = u32(Shader::Stage::Fragment);
-    if (programs[stage]) {
+    if (infos[stage]) {
        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
            .stage = vk::ShaderStageFlagBits::eFragment,
-            .module = programs[stage]->module,
+            .module = modules[stage],
            .pName = "main",
        });
    }
@ -396,8 +394,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,

        boost::container::static_vector<AmdGpu::Image, 16> tsharps;
        for (const auto& image_desc : stage->images) {
-            const auto tsharp =
-                stage->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
+            const auto tsharp = image_desc.GetTsharp(*stage);
            if (tsharp) {
                tsharps.emplace_back(tsharp);
                VideoCore::ImageInfo image_info{tsharp};
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -59,7 +59,8 @@ class GraphicsPipeline {
 public:
    explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
                              const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
-                              std::span<const Program*, MaxShaderStages> programs);
+                              std::span<const Shader::Info*, MaxShaderStages> stages,
+                              std::span<const vk::ShaderModule> modules);
    ~GraphicsPipeline();

    void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -5,7 +5,6 @@
 #include "common/io_file.h"
 #include "common/path_util.h"
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
-#include "shader_recompiler/exception.h"
 #include "shader_recompiler/recompiler.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -20,10 +19,6 @@ namespace Vulkan {

 using Shader::VsOutput;

-[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
-    return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
-}
-
 void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
    const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
        if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
@ -68,10 +63,12 @@ void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl
                   : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
 }

-Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
-                            const AmdGpu::Liverpool::Regs& regs) {
+Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
+                            u64 hash, const AmdGpu::Liverpool::Regs& regs) {
    Shader::Info info{};
    info.user_data = user_data;
+    info.pgm_base = pgm_base;
+    info.pgm_hash = hash;
    info.stage = stage;
    switch (stage) {
    case Shader::Stage::Vertex: {
@ -121,27 +118,38 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
 }

 const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
+    const auto& regs = liverpool->regs;
    // Tessellation is unsupported so skip the draw to avoid locking up the driver.
-    if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
+    if (regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
+        return nullptr;
+    }
+    // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
+    // actual draw hence can skip pipeline creation.
+    if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
+        LOG_TRACE(Render_Vulkan, "FCE pass skipped");
+        return nullptr;
+    }
+    if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
+        // TODO: check for a valid MRT1 to promote the draw to the resolve pass.
+        LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
        return nullptr;
    }
    RefreshGraphicsKey();
    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
    if (is_new) {
-        it.value() = CreateGraphicsPipeline();
+        it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key,
+                                                        *pipeline_cache, infos, modules);
    }
    const GraphicsPipeline* pipeline = it->second.get();
    return pipeline;
 }

 const ComputePipeline* PipelineCache::GetComputePipeline() {
-    const auto& cs_pgm = liverpool->regs.cs_program;
-    ASSERT(cs_pgm.Address() != nullptr);
-    const auto* bininfo = Liverpool::GetBinaryInfo(cs_pgm);
-    compute_key = bininfo->shader_hash;
+    RefreshComputeKey();
    const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
    if (is_new) {
-        it.value() = CreateComputePipeline();
+        it.value() = std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
+                                                       compute_key, *infos[0], modules[0]);
    }
    const ComputePipeline* pipeline = it->second.get();
    return pipeline;
@ -229,162 +237,64 @@ void PipelineCache::RefreshGraphicsKey() {
        ++remapped_cb;
    }

+    u32 binding{};
    for (u32 i = 0; i < MaxShaderStages; i++) {
        if (!regs.stage_enable.IsStageEnabled(i)) {
            key.stage_hashes[i] = 0;
+            infos[i] = nullptr;
            continue;
        }
        auto* pgm = regs.ProgramForStage(i);
        if (!pgm || !pgm->Address<u32*>()) {
            key.stage_hashes[i] = 0;
+            infos[i] = nullptr;
            continue;
        }
-        const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
-        if (!bininfo->Valid()) {
-            key.stage_hashes[i] = 0;
-            continue;
-        }
-        key.stage_hashes[i] = bininfo->shader_hash;
-    }
-}

-std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
-    const auto& regs = liverpool->regs;
-
-    // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
-    // actual draw hence can skip pipeline creation.
-    if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
-        LOG_TRACE(Render_Vulkan, "FCE pass skipped");
-        return {};
-    }
-
-    if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
-        // TODO: check for a valid MRT1 to promote the draw to the resolve pass.
-        LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
-        return {};
-    }
-
-    u32 binding{};
-    for (u32 i = 0; i < MaxShaderStages; i++) {
-        if (!graphics_key.stage_hashes[i]) {
-            programs[i] = nullptr;
-            continue;
-        }
-        auto* pgm = regs.ProgramForStage(i);
-        const auto code = pgm->Code();
-
-        // Dump shader code if requested.
        const auto stage = Shader::Stage{i};
-        const u64 hash = graphics_key.stage_hashes[i];
-        if (Config::dumpShaders()) {
-            DumpShader(code, hash, stage, "bin");
+        std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(pgm, stage, binding);
    }
-
-        if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) {
-            LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage);
-            return {};
-        }
-
-        const u64 lookup_hash = HashCombine(hash, binding);
-        auto it = program_cache.find(lookup_hash);
-        if (it != program_cache.end()) {
-            const Program* program = it.value().get();
-            ASSERT(program->pgm.info.stage == stage);
-            programs[i] = program;
-            binding = program->end_binding;
-            continue;
-        }
-
-        // Recompile shader to IR.
-        try {
-            auto program = std::make_unique<Program>();
-            block_pool.ReleaseContents();
-            inst_pool.ReleaseContents();
-
-            LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
-            Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
-            info.pgm_base = pgm->Address<uintptr_t>();
-            info.pgm_hash = hash;
-            program->pgm =
-                Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
-
-            // Compile IR to SPIR-V
-            program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding);
-            if (Config::dumpShaders()) {
-                DumpShader(program->spv, hash, stage, "spv");
-            }
-
-            // Compile module and set name to hash in renderdoc
-            program->end_binding = binding;
-            program->module = CompileSPV(program->spv, instance.GetDevice());
-            const auto name = fmt::format("{}_{:#x}", stage, hash);
-            Vulkan::SetObjectName(instance.GetDevice(), program->module, name);
-
-            // Cache program
-            const auto [it, _] = program_cache.emplace(lookup_hash, std::move(program));
-            programs[i] = it.value().get();
-        } catch (const Shader::Exception& e) {
-            UNREACHABLE_MSG("{}", e.what());
-        }
-    }
-
-    return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
-                                              programs);
 }

-std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
-    const auto& cs_pgm = liverpool->regs.cs_program;
-    const auto code = cs_pgm.Code();
+void PipelineCache::RefreshComputeKey() {
+    u32 binding{};
+    const auto* cs_pgm = &liverpool->regs.cs_program;
+    std::tie(infos[0], modules[0], compute_key) =
+        GetProgram(cs_pgm, Shader::Stage::Compute, binding);
+}

-    // Dump shader code if requested.
-    if (Config::dumpShaders()) {
-        DumpShader(code, compute_key, Shader::Stage::Compute, "bin");
-    }
+vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<const u32> code,
+                                              size_t perm_idx, u32& binding) {
+    LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
+             perm_idx != 0 ? "(permutation)" : "");

    block_pool.ReleaseContents();
    inst_pool.ReleaseContents();
-
-    // Recompile shader to IR.
-    try {
-        auto program = std::make_unique<Program>();
-        LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
-        Shader::Info info =
-            MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
-        info.pgm_base = cs_pgm.Address<uintptr_t>();
-        info.pgm_hash = compute_key;
-        program->pgm =
-            Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
+    const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);

    // Compile IR to SPIR-V
-        u32 binding{};
-        program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding);
+    const u64 key = info.GetStageSpecializedKey(binding);
+    const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
    if (Config::dumpShaders()) {
-            DumpShader(program->spv, compute_key, Shader::Stage::Compute, "spv");
+        DumpShader(spv, key, info.stage, perm_idx, "spv");
    }

-        // Compile module and set name to hash in renderdoc
-        program->module = CompileSPV(program->spv, instance.GetDevice());
-        const auto name = fmt::format("cs_{:#x}", compute_key);
-        Vulkan::SetObjectName(instance.GetDevice(), program->module, name);
-
-        // Cache program
-        const auto [it, _] = program_cache.emplace(compute_key, std::move(program));
-        return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, compute_key,
-                                                 it.value().get());
-    } catch (const Shader::Exception& e) {
-        UNREACHABLE_MSG("{}", e.what());
-        return nullptr;
-    }
+    // Create module and set name to hash in renderdoc
+    const auto module = CompileSPV(spv, instance.GetDevice());
+    ASSERT(module != VK_NULL_HANDLE);
+    const auto name = fmt::format("{}_{:#x}_{}", info.stage, key, perm_idx);
+    Vulkan::SetObjectName(instance.GetDevice(), module, name);
+    return module;
 }

 void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
-                               std::string_view ext) {
+                               size_t perm_idx, std::string_view ext) {
    using namespace Common::FS;
    const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
    if (!std::filesystem::exists(dump_dir)) {
        std::filesystem::create_directories(dump_dir);
    }
-    const auto filename = fmt::format("{}_{:#018x}.{}", stage, hash, ext);
+    const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
    const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
    file.WriteSpan(code);
 }
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -19,6 +19,15 @@ namespace Vulkan {
 class Instance;
 class Scheduler;

+struct Program {
+    using Module = std::pair<u64, vk::ShaderModule>;
+    Shader::Info info;
+    boost::container::small_vector<Module, 8> modules;
+};
+
+Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
+                            u64 hash, const AmdGpu::Liverpool::Regs& regs);
+
 class PipelineCache {
    static constexpr size_t MaxShaderStages = 5;

@ -33,10 +42,53 @@ public:

 private:
    void RefreshGraphicsKey();
-    void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, std::string_view ext);
+    void RefreshComputeKey();
+    void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
+                    std::string_view ext);

-    std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
-    std::unique_ptr<ComputePipeline> CreateComputePipeline();
+    vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
+                                   u32& binding);
+
+    std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const auto* pgm,
+                                                                      Shader::Stage stage,
+                                                                      u32& binding) {
+        // Fetch program for binaryinfo hash.
+        const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
+        const u64 hash = bininfo->shader_hash;
+        auto [it_pgm, new_program] = program_cache.try_emplace(hash);
+        u64 stage_key{};
+        if (new_program) {
+            // Create a new program and a module with current runtime state.
+            const VAddr pgm_base = pgm->template Address<VAddr>();
+            auto program = program_pool.Create();
+            program->info = MakeShaderInfo(stage, pgm->user_data, pgm_base, hash, liverpool->regs);
+            u32 start_binding = binding;
+            const auto module = CompileModule(program->info, pgm->Code(), 0, start_binding);
+            stage_key = program->info.GetStageSpecializedKey(binding);
+            program->modules.emplace_back(stage_key, module);
+            it_pgm.value() = program;
+        } else {
+            stage_key = it_pgm->second->info.GetStageSpecializedKey(binding);
+        }
+
+        Program* program = it_pgm->second;
+        const auto& info = program->info;
+
+        // Compile specialized module with current runtime state.
+        const auto it = std::ranges::find(program->modules, stage_key, &Program::Module::first);
+        if (it == program->modules.end()) {
+            auto new_info = MakeShaderInfo(stage, pgm->user_data, info.pgm_base, info.pgm_hash,
+                                           liverpool->regs);
+            const size_t perm_idx = program->modules.size();
+            const auto module = CompileModule(new_info, pgm->Code(), perm_idx, binding);
+            program->modules.emplace_back(stage_key, module);
+        } else {
+            binding += info.NumBindings();
+        }
+
+        const u64 full_hash = HashCombine(hash, stage_key);
+        return std::make_tuple(&info, it->second, full_hash);
+    }

 private:
    const Instance& instance;
@ -44,15 +96,17 @@ private:
    AmdGpu::Liverpool* liverpool;
    vk::UniquePipelineCache pipeline_cache;
    vk::UniquePipelineLayout pipeline_layout;
-    tsl::robin_map<size_t, std::unique_ptr<Program>> program_cache;
+    tsl::robin_map<size_t, Program*> program_cache;
    tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
    tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
-    std::array<const Program*, MaxShaderStages> programs{};
+    std::array<const Shader::Info*, MaxShaderStages> infos{};
+    std::array<vk::ShaderModule, MaxShaderStages> modules{};
    Shader::Profile profile{};
    GraphicsPipelineKey graphics_key{};
    u64 compute_key{};
    Common::ObjectPool<Shader::IR::Inst> inst_pool;
    Common::ObjectPool<Shader::IR::Block> block_pool;
+    Common::ObjectPool<Program> program_pool;
 };

 } // namespace Vulkan