diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 8f54e9b6..7f2a0989 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -3,6 +3,7 @@ #include #include +#include "common/assert.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" @@ -33,7 +34,7 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -std::vector ParseFetchShader(const u32* code, u32* out_size) { +FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { std::vector attributes; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; @@ -44,8 +45,9 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { s32 dst_reg{-1}; }; boost::container::static_vector loads; - std::array offsets{}; - offsets.fill(0xFF); + + s8 fetch_index_sgpr = -1; + s8 fetch_offset_sgpr = -1; u32 semantic_index = 0; while (!code_slice.atEnd()) { @@ -62,7 +64,18 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { } if (inst.opcode == Opcode::V_ADD_I32) { - offsets[inst.dst[0].code] = inst.src[0].code; + const auto vgpr = inst.dst[0].code; + const auto sgpr = s8(inst.src[0].code); + switch (vgpr) { + case 0: // V0 is always the index + fetch_index_sgpr = sgpr; + break; + case 3: // V3 is always the offset + fetch_offset_sgpr = sgpr; + break; + default: + UNREACHABLE(); + } } if (inst.inst_class == InstClass::VectorMemBufFmt) { @@ -74,29 +87,13 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { const auto it = std::ranges::find_if( loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; }); - auto mubuf = inst.control.mubuf; - auto& attrib = attributes.emplace_back(); attrib.semantic = semantic_index++; attrib.dest_vgpr = inst.src[1].code; - attrib.num_elements = mubuf.count; + attrib.num_elements = inst.control.mubuf.count; attrib.sgpr_base = it->base_sgpr; attrib.dword_offset = it->dword_offset; - u8 soofs = inst.src[0].code; - - if (mubuf.idxen != 0) { - attrib.index_sgpr = offsets[soofs++]; - } else { - attrib.index_sgpr = 0xFF; - } - - if (mubuf.offen != 0) { - attrib.offset_sgpr = offsets[soofs]; - } else { - attrib.offset_sgpr = 0xFF; - } - // Store instance id rate attrib.instance_data = inst.src[0].code; @@ -105,7 +102,11 @@ std::vector ParseFetchShader(const u32* code, u32* out_size) { } } - return attributes; + return FetchShaderData{ + .attributes = std::move(attributes), + .fetch_index_sgpr = fetch_index_sgpr, + .fetch_offset_sgpr = fetch_offset_sgpr, + }; } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 4ae0d237..4ea4853c 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -15,10 +15,14 @@ struct VertexAttribute { u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate - u8 index_sgpr; ///< Read index from VADDR - u8 offset_sgpr; ///< Offset from VADDR }; -std::vector ParseFetchShader(const u32* code, u32* out_size); +struct FetchShaderData { + std::vector attributes; + s8 fetch_index_sgpr; ///< Read index from VADDR + s8 fetch_offset_sgpr; ///< Read offset from VADDR +}; + +FetchShaderData ParseFetchShader(const u32* code, u32* out_size); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2a3b3503..14e2e83b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -346,7 +346,7 @@ void Translator::EmitFetch(const GcnInst& inst) { // Parse the assembly to generate a list of attributes. u32 fetch_size{}; - const auto attribs = ParseFetchShader(code, &fetch_size); + const auto fetch_data = ParseFetchShader(code, &fetch_size); if (Config::dumpShaders()) { using namespace Common::FS; @@ -359,7 +359,10 @@ void Translator::EmitFetch(const GcnInst& inst) { file.WriteRaw(code, fetch_size); } - for (const auto& attrib : attribs) { + info.fetch_index_sgpr = fetch_data.fetch_index_sgpr; + info.fetch_offset_sgpr = fetch_data.fetch_offset_sgpr; + + for (const auto& attrib : fetch_data.attributes) { const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; IR::VectorReg dst_reg{attrib.dest_vgpr}; @@ -412,8 +415,6 @@ void Translator::EmitFetch(const GcnInst& inst) { .num_components = std::min(attrib.num_elements, num_components), .sgpr_base = attrib.sgpr_base, .dword_offset = attrib.dword_offset, - .index_sgpr = attrib.index_sgpr, - .offset_sgpr = attrib.offset_sgpr, .instance_step_rate = step_rate, .instance_data_buf = instance_buf_handle, }); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 48220ce2..099be527 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -135,8 +135,6 @@ struct Info { u16 num_components; u8 sgpr_base; u8 dword_offset; - u8 index_sgpr; - u8 offset_sgpr; InstanceIdType instance_step_rate; s32 instance_data_buf; }; @@ -177,6 +175,9 @@ struct Info { AttributeFlags stores{}; boost::container::static_vector vs_outputs; + s8 fetch_index_sgpr = -1; + s8 fetch_offset_sgpr = -1; + BufferResourceList buffers; ImageResourceList images; SamplerResourceList samplers; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ea762d1f..02d6b2ce 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -101,7 +101,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } std::array host_buffers; - std::array host_offsets{}; + std::array host_offsets; boost::container::static_vector guest_buffers; struct BufferRange { @@ -131,16 +131,6 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } guest_buffers.emplace_back(buffer); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); - - u32 offset = 0; - if (input.index_sgpr != 0xFF) { - offset += vs_info.user_data[input.index_sgpr] * buffer.GetStride(); - } - if (input.offset_sgpr != 0xFF) { - offset += vs_info.user_data[input.offset_sgpr]; - } - host_offsets[guest_buffers.size() - 1] = offset; - attributes.push_back({ .location = input.binding, .binding = input.binding, @@ -190,7 +180,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { ASSERT(host_buffer != ranges_merged.cend()); host_buffers[i] = host_buffer->vk_buffer; - host_offsets[i] += host_buffer->offset + buffer.base_address - host_buffer->base_address; + host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address; } if (num_buffers > 0) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 542624a0..d80e268d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -51,13 +51,24 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { BeginRendering(); UpdateDynamicState(*pipeline); + u32 instance_offset = 0; + if (vs_info.fetch_offset_sgpr != -1) { + instance_offset = vs_info.user_data[vs_info.fetch_offset_sgpr]; + } + if (is_indexed) { - cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); + u32 vertex_offset = 0; + if (vs_info.fetch_index_sgpr != -1) { + vertex_offset = vs_info.user_data[vs_info.fetch_index_sgpr]; + } + + cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), + instance_offset); } else { const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices; - cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, instance_offset); } }