Merge pull request #637 from shadps4-emu/amdgpu/indirect_dispatch
video_core: added support for indirect dispatches (gfx only)
This commit is contained in:
commit
83e343f77e
|
@ -383,6 +383,22 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::DispatchIndirect: {
|
||||||
|
const auto* dispatch_indirect =
|
||||||
|
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||||
|
const auto offset = dispatch_indirect->data_offset;
|
||||||
|
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||||
|
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||||
|
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||||
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
|
rasterizer->ScopeMarkerBegin(
|
||||||
|
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||||
|
rasterizer->Breadcrumb(u64(cmd_address));
|
||||||
|
rasterizer->DispatchIndirect(ib_address, offset, size);
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::NumInstances: {
|
case PM4ItOpcode::NumInstances: {
|
||||||
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
||||||
regs.num_instances.num_instances = num_instances->num_instances;
|
regs.num_instances.num_instances = num_instances->num_instances;
|
||||||
|
@ -399,6 +415,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.num_indices = index_size->num_indices;
|
regs.num_indices = index_size->num_indices;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::SetBase: {
|
||||||
|
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
|
||||||
|
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
|
||||||
|
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::EventWrite: {
|
case PM4ItOpcode::EventWrite: {
|
||||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1127,6 +1127,7 @@ private:
|
||||||
std::vector<u32> ccb_buffer;
|
std::vector<u32> ccb_buffer;
|
||||||
std::queue<Task::Handle> submits{};
|
std::queue<Task::Handle> submits{};
|
||||||
ComputeProgram cs_state{};
|
ComputeProgram cs_state{};
|
||||||
|
VAddr indirect_args_addr{};
|
||||||
};
|
};
|
||||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||||
|
|
||||||
|
|
|
@ -704,4 +704,40 @@ struct PM4CmdReleaseMem {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PM4CmdSetBase {
|
||||||
|
enum class BaseIndex : u32 {
|
||||||
|
DisplayListPatchTable = 0b0000,
|
||||||
|
DrawIndexIndirPatchTable = 0b0001,
|
||||||
|
GdsPartition = 0b0010,
|
||||||
|
CePartition = 0b0011,
|
||||||
|
};
|
||||||
|
|
||||||
|
PM4Type3Header header;
|
||||||
|
union {
|
||||||
|
BitField<0, 4, BaseIndex> base_index;
|
||||||
|
u32 dw1;
|
||||||
|
};
|
||||||
|
u32 address0;
|
||||||
|
u32 address1;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Address() const {
|
||||||
|
ASSERT(base_index == BaseIndex::DisplayListPatchTable ||
|
||||||
|
base_index == BaseIndex::DrawIndexIndirPatchTable);
|
||||||
|
return reinterpret_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PM4CmdDispatchIndirect {
|
||||||
|
struct GroupDimensions {
|
||||||
|
u32 dim_x;
|
||||||
|
u32 dim_y;
|
||||||
|
u32 dim_z;
|
||||||
|
};
|
||||||
|
|
||||||
|
PM4Type3Header header;
|
||||||
|
u32 data_offset; ///< Byte aligned offset where the required data structure starts
|
||||||
|
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
|
@ -17,7 +17,8 @@ constexpr vk::BufferUsageFlags AllFlags =
|
||||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
||||||
vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
|
vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
|
||||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
|
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
|
||||||
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer;
|
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
|
||||||
|
vk::BufferUsageFlagBits::eIndirectBuffer;
|
||||||
|
|
||||||
std::string_view BufferTypeName(MemoryUsage type) {
|
std::string_view BufferTypeName(MemoryUsage type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
|
@ -90,6 +90,45 @@ void Rasterizer::DispatchDirect() {
|
||||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
const auto& cs_program = liverpool->regs.cs_program;
|
||||||
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
|
if (!pipeline) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache);
|
||||||
|
if (!has_resources) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
scheduler.EndRendering();
|
||||||
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||||
|
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true);
|
||||||
|
const auto total_offset = base + offset;
|
||||||
|
|
||||||
|
// Emulate PFP-to-ME sync packet
|
||||||
|
const vk::BufferMemoryBarrier ib_barrier{
|
||||||
|
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits::eIndirectCommandRead,
|
||||||
|
.buffer = buffer->Handle(),
|
||||||
|
.offset = total_offset,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
vk::PipelineStageFlagBits::eDrawIndirect,
|
||||||
|
vk::DependencyFlagBits::eByRegion, {}, ib_barrier, {});
|
||||||
|
|
||||||
|
cmdbuf.dispatchIndirect(buffer->Handle(), total_offset);
|
||||||
|
}
|
||||||
|
|
||||||
u64 Rasterizer::Flush() {
|
u64 Rasterizer::Flush() {
|
||||||
const u64 current_tick = scheduler.CurrentTick();
|
const u64 current_tick = scheduler.CurrentTick();
|
||||||
SubmitInfo info{};
|
SubmitInfo info{};
|
||||||
|
|
|
@ -34,6 +34,7 @@ public:
|
||||||
void Draw(bool is_indexed, u32 index_offset = 0);
|
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||||
|
|
||||||
void DispatchDirect();
|
void DispatchDirect();
|
||||||
|
void DispatchIndirect(VAddr address, u32 offset, u32 size);
|
||||||
|
|
||||||
void ScopeMarkerBegin(const std::string_view& str);
|
void ScopeMarkerBegin(const std::string_view& str);
|
||||||
void ScopeMarkerEnd();
|
void ScopeMarkerEnd();
|
||||||
|
|
Loading…
Reference in New Issue