renderer: added support for instance step rates
This commit is contained in:
parent
986ed0662c
commit
cfbe8b9e6d
|
@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& param{ctx.input_params.at(index)};
|
||||
if (!ValidId(param.id)) {
|
||||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
if (param.num_components > 1) {
|
||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
return ctx.OpLoad(param.component_type, pointer);
|
||||
if (param.buffer_handle < 0) {
|
||||
if (!ValidId(param.id)) {
|
||||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
|
||||
if (param.num_components > 1) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
return ctx.OpLoad(param.component_type, pointer);
|
||||
} else {
|
||||
return ctx.OpLoad(param.component_type, param.id);
|
||||
}
|
||||
} else {
|
||||
return ctx.OpLoad(param.component_type, param.id);
|
||||
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
|
||||
const auto step_rate = ctx.OpLoad(
|
||||
ctx.U32[1],
|
||||
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.instance_step_rates, rate_idx));
|
||||
const auto offset = ctx.OpIAdd(
|
||||
ctx.U32[1],
|
||||
ctx.OpIMul(
|
||||
ctx.U32[1],
|
||||
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
||||
ctx.ConstU32(param.num_components)),
|
||||
ctx.ConstU32(comp));
|
||||
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
|
||||
}
|
||||
}
|
||||
switch (attr) {
|
||||
|
|
|
@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
|
||||
void EmitContext::DefineInputs(const Info& info) {
|
||||
switch (stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(instance_step_rates, "step_rates");
|
||||
interfaces.push_back(instance_step_rates);
|
||||
|
||||
for (const auto& input : info.vs_inputs) {
|
||||
const Id type{GetAttributeType(*this, input.fmt)};
|
||||
const Id id{DefineInput(type, input.binding)};
|
||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||
interfaces.push_back(id);
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
|
||||
const u32 rate_idx =
|
||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
|
||||
: 1;
|
||||
// Note that we pass index rather than Id
|
||||
input_params[input.binding] = {
|
||||
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
|
||||
};
|
||||
} else {
|
||||
Id id{DefineInput(type, input.binding)};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
|
||||
Name(id, fmt::format("vs_instance_attr{}", input.binding));
|
||||
} else {
|
||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||
}
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
if (info.uses_group_quad) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
|
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
const auto name =
|
||||
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||
buffer.is_instance_data
|
||||
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
|
||||
sizeof(float) * CHAR_BIT)
|
||||
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||
Name(struct_type, name);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
|
|
|
@ -165,6 +165,8 @@ public:
|
|||
|
||||
Id output_position{};
|
||||
Id vertex_index{};
|
||||
Id instance_id{};
|
||||
Id instance_step_rates{};
|
||||
Id base_vertex{};
|
||||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
|
@ -214,6 +216,7 @@ public:
|
|||
Id pointer_type;
|
||||
Id component_type;
|
||||
u32 num_components;
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
std::array<SpirvAttribute, 32> input_params{};
|
||||
std::array<SpirvAttribute, 32> output_params{};
|
||||
|
|
|
@ -235,9 +235,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg++, comp);
|
||||
}
|
||||
|
||||
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
|
||||
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
|
||||
attrib.instance_data);
|
||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||
// shader, so VBs should be bound as regular data buffers
|
||||
s32 instance_buf_handle = -1;
|
||||
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
||||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||
step_rate == Info::VsInput::OverStepRate1) {
|
||||
info.buffers.push_back({
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = buffer.num_records,
|
||||
.used_types = IR::Type::F32,
|
||||
.is_instance_data = true,
|
||||
});
|
||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
|
@ -247,7 +259,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
|
||||
.instance_step_rate = step_rate,
|
||||
.instance_data_buf = instance_buf_handle,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,7 +77,8 @@ struct BufferResource {
|
|||
u32 num_records;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_storage;
|
||||
bool is_storage{false};
|
||||
bool is_instance_data{false};
|
||||
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
};
|
||||
|
@ -116,6 +117,7 @@ struct Info {
|
|||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
|
|
|
@ -887,7 +887,10 @@ struct Liverpool {
|
|||
IndexBufferType index_buffer_type;
|
||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||
u32 enable_primitive_id;
|
||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
|
||||
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
|
||||
u32 vgt_instance_step_rate_0;
|
||||
u32 vgt_instance_step_rate_1;
|
||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
|
||||
PolygonOffset poly_offset;
|
||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||
AaConfig aa_config;
|
||||
|
@ -1046,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
|||
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||
|
|
|
@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
stages[i] = *infos[i];
|
||||
}
|
||||
BuildDescSetLayout();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex,
|
||||
.offset = 0,
|
||||
.size = 2 * sizeof(u32),
|
||||
};
|
||||
|
||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &set_layout,
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_constants,
|
||||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
|
||||
|
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
||||
const auto& vs_info = stages[0];
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
// Skip attribute binding as the data will be pulled by shader
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
|
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
|||
// Calculate buffers memory overlaps
|
||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
||||
for (const auto& input : vs_info.vs_inputs) {
|
||||
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
|
|
|
@ -67,20 +67,24 @@ public:
|
|||
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||
VideoCore::TextureCache& texture_cache) const;
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
||||
vk::PipelineLayout GetLayout() const {
|
||||
return *pipeline_layout;
|
||||
}
|
||||
|
||||
bool IsEmbeddedVs() const noexcept {
|
||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||
return key.stage_hashes[0] == EmbeddedVsHash;
|
||||
}
|
||||
|
||||
[[nodiscard]] auto GetWriteMasks() const {
|
||||
auto GetWriteMasks() const {
|
||||
return key.write_masks;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDepthEnabled() const {
|
||||
bool IsDepthEnabled() const {
|
||||
return key.depth.depth_enable.Value();
|
||||
}
|
||||
|
||||
|
|
|
@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
|||
UpdateDynamicState(*pipeline);
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
|
||||
const u32 step_rates[] = {
|
||||
regs.vgt_instance_step_rate_0,
|
||||
regs.vgt_instance_step_rate_1,
|
||||
};
|
||||
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
|
||||
sizeof(step_rates), &step_rates);
|
||||
if (is_indexed) {
|
||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue