shader_recompiler: handle fetch shader address offsets (#538)

* shader_recompiler: handle fetch shader address offsets parse index & offset sgpr from fetch shader and propagate them to vkBindVertexBuffers * shader_recompiler: fix fetch_shader when offset is not present * video_core: propagate index/offset SGPRs to vkCmdDraw instead of offsetting the buffer address * video_core: add vertex_offset to non-indexed draw calls renamed fetch offset fields
2024-08-24 12:36:40 -03:00 · 2024-08-24 12:36:40 -03:00 · 9e4fc17e6c
parent 208575d392
commit 9e4fc17e6c
5 changed files with 53 additions and 9 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -3,6 +3,7 @@

 #include <algorithm>
 #include <boost/container/static_vector.hpp>
+#include "common/assert.h"
 #include "shader_recompiler/frontend/decode.h"
 #include "shader_recompiler/frontend/fetch_shader.h"

@ -33,8 +34,8 @@ namespace Shader::Gcn {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

-std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
-    std::vector<VertexAttribute> attributes;
+FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
+    FetchShaderData data{};
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;

@ -59,6 +60,21 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
            continue;
        }

+        if (inst.opcode == Opcode::V_ADD_I32) {
+            const auto vgpr = inst.dst[0].code;
+            const auto sgpr = s8(inst.src[0].code);
+            switch (vgpr) {
+            case 0: // V0 is always the vertex offset
+                data.vertex_offset_sgpr = sgpr;
+                break;
+            case 3: // V3 is always the instance offset
+                data.instance_offset_sgpr = sgpr;
+                break;
+            default:
+                UNREACHABLE();
+            }
+        }
+
        if (inst.inst_class == InstClass::VectorMemBufFmt) {
            // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
            const u32 base_sgpr = inst.src[2].code * 4;
@ -68,7 +84,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
            const auto it = std::ranges::find_if(
                loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });

-            auto& attrib = attributes.emplace_back();
+            auto& attrib = data.attributes.emplace_back();
            attrib.semantic = semantic_index++;
            attrib.dest_vgpr = inst.src[1].code;
            attrib.num_elements = inst.control.mubuf.count;
@ -83,7 +99,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
        }
    }

-    return attributes;
+    return data;
 }

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -17,6 +17,12 @@ struct VertexAttribute {
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
 };

-std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
+struct FetchShaderData {
+    std::vector<VertexAttribute> attributes;
+    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
+    s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
+};
+
+FetchShaderData ParseFetchShader(const u32* code, u32* out_size);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -346,7 +346,7 @@ void Translator::EmitFetch(const GcnInst& inst) {

    // Parse the assembly to generate a list of attributes.
    u32 fetch_size{};
-    const auto attribs = ParseFetchShader(code, &fetch_size);
+    const auto fetch_data = ParseFetchShader(code, &fetch_size);

    if (Config::dumpShaders()) {
        using namespace Common::FS;
@ -359,7 +359,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
        file.WriteRaw<u8>(code, fetch_size);
    }

-    for (const auto& attrib : attribs) {
+    info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
+    info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
+
+    for (const auto& attrib : fetch_data.attributes) {
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};

--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -175,6 +175,9 @@ struct Info {
    AttributeFlags stores{};
    boost::container::static_vector<VsOutputMap, 3> vs_outputs;

+    s8 vertex_offset_sgpr = -1;
+    s8 instance_offset_sgpr = -1;
+
    BufferResourceList buffers;
    ImageResourceList images;
    SamplerResourceList samplers;
@ -210,6 +213,18 @@ struct Info {
        std::memcpy(&data, base + dword_offset, sizeof(T));
        return data;
    }
+
+    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
+        u32 vertex_offset = 0;
+        u32 instance_offset = 0;
+        if (vertex_offset_sgpr != -1) {
+            vertex_offset = user_data[vertex_offset_sgpr];
+        }
+        if (instance_offset_sgpr != -1) {
+            instance_offset = user_data[instance_offset_sgpr];
+        }
+        return {vertex_offset, instance_offset};
+    }
 };

 constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -52,13 +52,17 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
    BeginRendering();
    UpdateDynamicState(*pipeline);

+    const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets();
+
    if (is_indexed) {
-        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
+        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
+                           instance_offset);
    } else {
        const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
                                     ? 4
                                     : regs.num_indices;
-        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
+        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
+                    instance_offset);
    }
 }