From cfbe8b9e6d413cea9138386428866fd282d9a041 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 6 Jul 2024 17:01:43 +0200 Subject: [PATCH] renderer: added support for instance step rates --- .../spirv/emit_spirv_context_get_set.cpp | 34 ++++++++++---- .../backend/spirv/spirv_emit_context.cpp | 45 ++++++++++++++++--- .../backend/spirv/spirv_emit_context.h | 3 ++ .../frontend/translate/translate.cpp | 21 +++++++-- src/shader_recompiler/runtime_info.h | 4 +- src/video_core/amdgpu/liverpool.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 ++++++++- .../renderer_vulkan/vk_graphics_pipeline.h | 12 +++-- .../renderer_vulkan/vk_rasterizer.cpp | 7 +++ 9 files changed, 129 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8d8a1488..157023b6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; - if (!ValidId(param.id)) { - // Attribute is disabled or varying component is not written - return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); - } - if (param.num_components > 1) { - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - return ctx.OpLoad(param.component_type, pointer); + if (param.buffer_handle < 0) { + if (!ValidId(param.id)) { + // Attribute is disabled or varying component is not written + return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); + } + + if (param.num_components > 1) { + const Id pointer{ + ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; + return ctx.OpLoad(param.component_type, pointer); + } else { + return ctx.OpLoad(param.component_type, param.id); + } } else { - return ctx.OpLoad(param.component_type, param.id); + const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value; + const auto step_rate = ctx.OpLoad( + ctx.U32[1], + ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.instance_step_rates, rate_idx)); + const auto offset = ctx.OpIAdd( + ctx.U32[1], + ctx.OpIMul( + ctx.U32[1], + ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), + ctx.ConstU32(param.num_components)), + ctx.ConstU32(comp)); + return EmitReadConstBuffer(ctx, param.buffer_handle, offset); } } switch (attr) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 6d9b2547..617458dd 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { void EmitContext::DefineInputs(const Info& info) { switch (stage) { - case Stage::Vertex: + case Stage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); + + // Create push constants block for instance steps rates + const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")}; + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "sr0"); + MemberName(struct_type, 1, "sr1"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); + instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant); + Name(instance_step_rates, "step_rates"); + interfaces.push_back(instance_step_rates); + for (const auto& input : info.vs_inputs) { const Id type{GetAttributeType(*this, input.fmt)}; - const Id id{DefineInput(type, input.binding)}; - Name(id, fmt::format("vs_in_attr{}", input.binding)); - input_params[input.binding] = GetAttributeInfo(input.fmt, id); - interfaces.push_back(id); + if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { + + const u32 rate_idx = + input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 + : 1; + // Note that we pass index rather than Id + input_params[input.binding] = { + rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf, + }; + } else { + Id id{DefineInput(type, input.binding)}; + if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { + Name(id, fmt::format("vs_instance_attr{}", input.binding)); + } else { + Name(id, fmt::format("vs_in_attr{}", input.binding)); + } + input_params[input.binding] = GetAttributeInfo(input.fmt, id); + interfaces.push_back(id); + } } break; + } case Stage::Fragment: if (info.uses_group_quad) { subgroup_local_invocation_id = DefineVariable( @@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) { if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); const auto name = - fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); + buffer.is_instance_data + ? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f', + sizeof(float) * CHAR_BIT) + : fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); Name(struct_type, name); Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index d143be4b..b51edd63 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -165,6 +165,8 @@ public: Id output_position{}; Id vertex_index{}; + Id instance_id{}; + Id instance_step_rates{}; Id base_vertex{}; Id frag_coord{}; Id front_facing{}; @@ -214,6 +216,7 @@ public: Id pointer_type; Id component_type; u32 num_components; + s32 buffer_handle{-1}; }; std::array input_params{}; std::array output_params{}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index cb6d16c3..ec5790ac 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -235,9 +235,21 @@ void Translator::EmitFetch(const GcnInst& inst) { ir.SetVectorReg(dst_reg++, comp); } - if (attrib.instance_data == 2 || attrib.instance_data == 3) { - LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}", - attrib.instance_data); + // In case of programmable step rates we need to fallback to instance data pulling in + // shader, so VBs should be bound as regular data buffers + s32 instance_buf_handle = -1; + const auto step_rate = static_cast(attrib.instance_data); + if (step_rate == Info::VsInput::OverStepRate0 || + step_rate == Info::VsInput::OverStepRate1) { + info.buffers.push_back({ + .sgpr_base = attrib.sgpr_base, + .dword_offset = attrib.dword_offset, + .stride = buffer.GetStride(), + .num_records = buffer.num_records, + .used_types = IR::Type::F32, + .is_instance_data = true, + }); + instance_buf_handle = s32(info.buffers.size() - 1); } const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); @@ -247,7 +259,8 @@ void Translator::EmitFetch(const GcnInst& inst) { .num_components = std::min(attrib.num_elements, num_components), .sgpr_base = attrib.sgpr_base, .dword_offset = attrib.dword_offset, - .instance_step_rate = static_cast(attrib.instance_data), + .instance_step_rate = step_rate, + .instance_data_buf = instance_buf_handle, }); } } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 66d32d4d..6cf5c632 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -77,7 +77,8 @@ struct BufferResource { u32 num_records; IR::Type used_types; AmdGpu::Buffer inline_cbuf; - bool is_storage; + bool is_storage{false}; + bool is_instance_data{false}; constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; }; @@ -116,6 +117,7 @@ struct Info { u8 sgpr_base; u8 dword_offset; InstanceIdType instance_step_rate; + s32 instance_data_buf; }; boost::container::static_vector vs_inputs{}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index e12c626d..536167ff 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -887,7 +887,10 @@ struct Liverpool { IndexBufferType index_buffer_type; INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); u32 enable_primitive_id; - INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1); + INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1); + u32 vgt_instance_step_rate_0; + u32 vgt_instance_step_rate_1; + INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1); PolygonOffset poly_offset; INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); AaConfig aa_config; @@ -1046,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7b7eda44..01901836 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul stages[i] = *infos[i]; } BuildDescSetLayout(); + + const vk::PushConstantRange push_constants = { + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .offset = 0, + .size = 2 * sizeof(u32), + }; + const vk::DescriptorSetLayout set_layout = *desc_layout; const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, .pSetLayouts = &set_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constants, }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); @@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector attributes; const auto& vs_info = stages[0]; for (const auto& input : vs_info.vs_inputs) { + if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + // Skip attribute binding as the data will be pulled by shader + continue; + } + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); attributes.push_back({ .location = input.binding, @@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { // Calculate buffers memory overlaps boost::container::static_vector ranges{}; for (const auto& input : vs_info.vs_inputs) { + if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + continue; + } + const auto& buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); if (buffer.GetSize() == 0) { continue; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 060a2695..ab8be78f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,20 +67,24 @@ public: void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const; - [[nodiscard]] vk::Pipeline Handle() const noexcept { + vk::Pipeline Handle() const noexcept { return *pipeline; } - [[nodiscard]] bool IsEmbeddedVs() const noexcept { + vk::PipelineLayout GetLayout() const { + return *pipeline_layout; + } + + bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[0] == EmbeddedVsHash; } - [[nodiscard]] auto GetWriteMasks() const { + auto GetWriteMasks() const { return key.write_masks; } - [[nodiscard]] bool IsDepthEnabled() const { + bool IsDepthEnabled() const { return key.depth.depth_enable.Value(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6440ebc7..f7059f17 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { UpdateDynamicState(*pipeline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + + const u32 step_rates[] = { + regs.vgt_instance_step_rate_0, + regs.vgt_instance_step_rate_1, + }; + cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u, + sizeof(step_rates), &step_rates); if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else {