From 9e4fc17e6c2e18b4e4c7707115378b31a1ee4210 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sat, 24 Aug 2024 12:36:40 -0300 Subject: [PATCH] shader_recompiler: handle fetch shader address offsets (#538) * shader_recompiler: handle fetch shader address offsets parse index & offset sgpr from fetch shader and propagate them to vkBindVertexBuffers * shader_recompiler: fix fetch_shader when offset is not present * video_core: propagate index/offset SGPRs to vkCmdDraw instead of offsetting the buffer address * video_core: add vertex_offset to non-indexed draw calls renamed fetch offset fields --- .../frontend/fetch_shader.cpp | 24 +++++++++++++++---- src/shader_recompiler/frontend/fetch_shader.h | 8 ++++++- .../frontend/translate/translate.cpp | 7 ++++-- src/shader_recompiler/runtime_info.h | 15 ++++++++++++ .../renderer_vulkan/vk_rasterizer.cpp | 8 +++++-- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 81e4093f..16938410 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -3,6 +3,7 @@ #include #include +#include "common/assert.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" @@ -33,8 +34,8 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -std::vector ParseFetchShader(const u32* code, u32* out_size) { - std::vector attributes; +FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { + FetchShaderData data{}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; @@ -59,6 +60,21 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { continue; } + if (inst.opcode == Opcode::V_ADD_I32) { + const auto vgpr = inst.dst[0].code; + const auto sgpr = s8(inst.src[0].code); + switch (vgpr) { + case 0: // V0 is always the vertex offset + data.vertex_offset_sgpr = sgpr; + break; + case 3: // V3 is always the instance offset + data.instance_offset_sgpr = sgpr; + break; + default: + UNREACHABLE(); + } + } + if (inst.inst_class == InstClass::VectorMemBufFmt) { // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs const u32 base_sgpr = inst.src[2].code * 4; @@ -68,7 +84,7 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { const auto it = std::ranges::find_if( loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; }); - auto& attrib = attributes.emplace_back(); + auto& attrib = data.attributes.emplace_back(); attrib.semantic = semantic_index++; attrib.dest_vgpr = inst.src[1].code; attrib.num_elements = inst.control.mubuf.count; @@ -83,7 +99,7 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { } } - return attributes; + return data; } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 0858061a..0e5d1541 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -17,6 +17,12 @@ struct VertexAttribute { u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate }; -std::vector ParseFetchShader(const u32* code, u32* out_size); +struct FetchShaderData { + std::vector attributes; + s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR + s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR +}; + +FetchShaderData ParseFetchShader(const u32* code, u32* out_size); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 4070560a..e59cd565 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -346,7 +346,7 @@ void Translator::EmitFetch(const GcnInst& inst) { // Parse the assembly to generate a list of attributes. u32 fetch_size{}; - const auto attribs = ParseFetchShader(code, &fetch_size); + const auto fetch_data = ParseFetchShader(code, &fetch_size); if (Config::dumpShaders()) { using namespace Common::FS; @@ -359,7 +359,10 @@ void Translator::EmitFetch(const GcnInst& inst) { file.WriteRaw(code, fetch_size); } - for (const auto& attrib : attribs) { + info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr; + info.instance_offset_sgpr = fetch_data.instance_offset_sgpr; + + for (const auto& attrib : fetch_data.attributes) { const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; IR::VectorReg dst_reg{attrib.dest_vgpr}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index b1eb6aea..7d36dbe1 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -175,6 +175,9 @@ struct Info { AttributeFlags stores{}; boost::container::static_vector vs_outputs; + s8 vertex_offset_sgpr = -1; + s8 instance_offset_sgpr = -1; + BufferResourceList buffers; ImageResourceList images; SamplerResourceList samplers; @@ -210,6 +213,18 @@ struct Info { std::memcpy(&data, base + dword_offset, sizeof(T)); return data; } + + [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + u32 vertex_offset = 0; + u32 instance_offset = 0; + if (vertex_offset_sgpr != -1) { + vertex_offset = user_data[vertex_offset_sgpr]; + } + if (instance_offset_sgpr != -1) { + instance_offset = user_data[instance_offset_sgpr]; + } + return {vertex_offset, instance_offset}; + } }; constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6cd80393..3b02a98a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -52,13 +52,17 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { BeginRendering(); UpdateDynamicState(*pipeline); + const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); + if (is_indexed) { - cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); + cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), + instance_offset); } else { const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices; - cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, + instance_offset); } }