renderer: added support for instance step rates
This commit is contained in:
parent
986ed0662c
commit
cfbe8b9e6d
|
@ -135,16 +135,34 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& param{ctx.input_params.at(index)};
|
const auto& param{ctx.input_params.at(index)};
|
||||||
|
if (param.buffer_handle < 0) {
|
||||||
if (!ValidId(param.id)) {
|
if (!ValidId(param.id)) {
|
||||||
// Attribute is disabled or varying component is not written
|
// Attribute is disabled or varying component is not written
|
||||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (param.num_components > 1) {
|
if (param.num_components > 1) {
|
||||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||||
return ctx.OpLoad(param.component_type, pointer);
|
return ctx.OpLoad(param.component_type, pointer);
|
||||||
} else {
|
} else {
|
||||||
return ctx.OpLoad(param.component_type, param.id);
|
return ctx.OpLoad(param.component_type, param.id);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
|
||||||
|
const auto step_rate = ctx.OpLoad(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||||
|
ctx.instance_step_rates, rate_idx));
|
||||||
|
const auto offset = ctx.OpIAdd(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpIMul(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
||||||
|
ctx.ConstU32(param.num_components)),
|
||||||
|
ctx.ConstU32(comp));
|
||||||
|
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::FragCoord: {
|
case IR::Attribute::FragCoord: {
|
||||||
|
|
|
@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||||
|
|
||||||
void EmitContext::DefineInputs(const Info& info) {
|
void EmitContext::DefineInputs(const Info& info) {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Stage::Vertex:
|
case Stage::Vertex: {
|
||||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||||
|
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||||
|
|
||||||
|
// Create push constants block for instance steps rates
|
||||||
|
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
|
MemberName(struct_type, 0, "sr0");
|
||||||
|
MemberName(struct_type, 1, "sr1");
|
||||||
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||||
|
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||||
|
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||||
|
Name(instance_step_rates, "step_rates");
|
||||||
|
interfaces.push_back(instance_step_rates);
|
||||||
|
|
||||||
for (const auto& input : info.vs_inputs) {
|
for (const auto& input : info.vs_inputs) {
|
||||||
const Id type{GetAttributeType(*this, input.fmt)};
|
const Id type{GetAttributeType(*this, input.fmt)};
|
||||||
const Id id{DefineInput(type, input.binding)};
|
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
|
||||||
|
const u32 rate_idx =
|
||||||
|
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
|
||||||
|
: 1;
|
||||||
|
// Note that we pass index rather than Id
|
||||||
|
input_params[input.binding] = {
|
||||||
|
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
Id id{DefineInput(type, input.binding)};
|
||||||
|
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
|
||||||
|
Name(id, fmt::format("vs_instance_attr{}", input.binding));
|
||||||
|
} else {
|
||||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||||
|
}
|
||||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case Stage::Fragment:
|
case Stage::Fragment:
|
||||||
if (info.uses_group_quad) {
|
if (info.uses_group_quad) {
|
||||||
subgroup_local_invocation_id = DefineVariable(
|
subgroup_local_invocation_id = DefineVariable(
|
||||||
|
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
|
||||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||||
const auto name =
|
const auto name =
|
||||||
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
buffer.is_instance_data
|
||||||
|
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
|
||||||
|
sizeof(float) * CHAR_BIT)
|
||||||
|
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||||
Name(struct_type, name);
|
Name(struct_type, name);
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, 0, "data");
|
MemberName(struct_type, 0, "data");
|
||||||
|
|
|
@ -165,6 +165,8 @@ public:
|
||||||
|
|
||||||
Id output_position{};
|
Id output_position{};
|
||||||
Id vertex_index{};
|
Id vertex_index{};
|
||||||
|
Id instance_id{};
|
||||||
|
Id instance_step_rates{};
|
||||||
Id base_vertex{};
|
Id base_vertex{};
|
||||||
Id frag_coord{};
|
Id frag_coord{};
|
||||||
Id front_facing{};
|
Id front_facing{};
|
||||||
|
@ -214,6 +216,7 @@ public:
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
Id component_type;
|
Id component_type;
|
||||||
u32 num_components;
|
u32 num_components;
|
||||||
|
s32 buffer_handle{-1};
|
||||||
};
|
};
|
||||||
std::array<SpirvAttribute, 32> input_params{};
|
std::array<SpirvAttribute, 32> input_params{};
|
||||||
std::array<SpirvAttribute, 32> output_params{};
|
std::array<SpirvAttribute, 32> output_params{};
|
||||||
|
|
|
@ -235,9 +235,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
ir.SetVectorReg(dst_reg++, comp);
|
ir.SetVectorReg(dst_reg++, comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
|
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||||
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
|
// shader, so VBs should be bound as regular data buffers
|
||||||
attrib.instance_data);
|
s32 instance_buf_handle = -1;
|
||||||
|
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
||||||
|
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||||
|
step_rate == Info::VsInput::OverStepRate1) {
|
||||||
|
info.buffers.push_back({
|
||||||
|
.sgpr_base = attrib.sgpr_base,
|
||||||
|
.dword_offset = attrib.dword_offset,
|
||||||
|
.stride = buffer.GetStride(),
|
||||||
|
.num_records = buffer.num_records,
|
||||||
|
.used_types = IR::Type::F32,
|
||||||
|
.is_instance_data = true,
|
||||||
|
});
|
||||||
|
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||||
|
@ -247,7 +259,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||||
.sgpr_base = attrib.sgpr_base,
|
.sgpr_base = attrib.sgpr_base,
|
||||||
.dword_offset = attrib.dword_offset,
|
.dword_offset = attrib.dword_offset,
|
||||||
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
|
.instance_step_rate = step_rate,
|
||||||
|
.instance_data_buf = instance_buf_handle,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,8 @@ struct BufferResource {
|
||||||
u32 num_records;
|
u32 num_records;
|
||||||
IR::Type used_types;
|
IR::Type used_types;
|
||||||
AmdGpu::Buffer inline_cbuf;
|
AmdGpu::Buffer inline_cbuf;
|
||||||
bool is_storage;
|
bool is_storage{false};
|
||||||
|
bool is_instance_data{false};
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||||
};
|
};
|
||||||
|
@ -116,6 +117,7 @@ struct Info {
|
||||||
u8 sgpr_base;
|
u8 sgpr_base;
|
||||||
u8 dword_offset;
|
u8 dword_offset;
|
||||||
InstanceIdType instance_step_rate;
|
InstanceIdType instance_step_rate;
|
||||||
|
s32 instance_data_buf;
|
||||||
};
|
};
|
||||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||||
|
|
||||||
|
|
|
@ -887,7 +887,10 @@ struct Liverpool {
|
||||||
IndexBufferType index_buffer_type;
|
IndexBufferType index_buffer_type;
|
||||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||||
u32 enable_primitive_id;
|
u32 enable_primitive_id;
|
||||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
|
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
|
||||||
|
u32 vgt_instance_step_rate_0;
|
||||||
|
u32 vgt_instance_step_rate_1;
|
||||||
|
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
|
||||||
PolygonOffset poly_offset;
|
PolygonOffset poly_offset;
|
||||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||||
AaConfig aa_config;
|
AaConfig aa_config;
|
||||||
|
@ -1046,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
||||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||||
|
|
|
@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
stages[i] = *infos[i];
|
stages[i] = *infos[i];
|
||||||
}
|
}
|
||||||
BuildDescSetLayout();
|
BuildDescSetLayout();
|
||||||
|
|
||||||
|
const vk::PushConstantRange push_constants = {
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eVertex,
|
||||||
|
.offset = 0,
|
||||||
|
.size = 2 * sizeof(u32),
|
||||||
|
};
|
||||||
|
|
||||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
.setLayoutCount = 1U,
|
.setLayoutCount = 1U,
|
||||||
.pSetLayouts = &set_layout,
|
.pSetLayouts = &set_layout,
|
||||||
.pushConstantRangeCount = 0,
|
.pushConstantRangeCount = 1,
|
||||||
.pPushConstantRanges = nullptr,
|
.pPushConstantRanges = &push_constants,
|
||||||
};
|
};
|
||||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||||
|
|
||||||
|
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
||||||
const auto& vs_info = stages[0];
|
const auto& vs_info = stages[0];
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& input : vs_info.vs_inputs) {
|
||||||
|
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
// Skip attribute binding as the data will be pulled by shader
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||||
attributes.push_back({
|
attributes.push_back({
|
||||||
.location = input.binding,
|
.location = input.binding,
|
||||||
|
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
||||||
// Calculate buffers memory overlaps
|
// Calculate buffers memory overlaps
|
||||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& input : vs_info.vs_inputs) {
|
||||||
|
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -67,20 +67,24 @@ public:
|
||||||
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
VideoCore::TextureCache& texture_cache) const;
|
VideoCore::TextureCache& texture_cache) const;
|
||||||
|
|
||||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
vk::Pipeline Handle() const noexcept {
|
||||||
return *pipeline;
|
return *pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
vk::PipelineLayout GetLayout() const {
|
||||||
|
return *pipeline_layout;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsEmbeddedVs() const noexcept {
|
||||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||||
return key.stage_hashes[0] == EmbeddedVsHash;
|
return key.stage_hashes[0] == EmbeddedVsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] auto GetWriteMasks() const {
|
auto GetWriteMasks() const {
|
||||||
return key.write_masks;
|
return key.write_masks;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsDepthEnabled() const {
|
bool IsDepthEnabled() const {
|
||||||
return key.depth.depth_enable.Value();
|
return key.depth.depth_enable.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
UpdateDynamicState(*pipeline);
|
UpdateDynamicState(*pipeline);
|
||||||
|
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||||
|
|
||||||
|
const u32 step_rates[] = {
|
||||||
|
regs.vgt_instance_step_rate_0,
|
||||||
|
regs.vgt_instance_step_rate_1,
|
||||||
|
};
|
||||||
|
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
|
||||||
|
sizeof(step_rates), &step_rates);
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue