diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 8570a290..e61f8cec 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -383,6 +383,22 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + const auto offset = dispatch_indirect->data_offset; + const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; + const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); + if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("dcb:{}:DispatchIndirect", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); + rasterizer->DispatchIndirect(ib_address, offset, size); + rasterizer->ScopeMarkerEnd(); + } + break; + } case PM4ItOpcode::NumInstances: { const auto* num_instances = reinterpret_cast(header); regs.num_instances.num_instances = num_instances->num_instances; @@ -399,6 +415,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spannum_indices; break; } + case PM4ItOpcode::SetBase: { + const auto* set_base = reinterpret_cast(header); + ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable); + mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address(); + break; + } case PM4ItOpcode::EventWrite: { // const auto* event = reinterpret_cast(header); break; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 14284bbc..7b38ca79 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1127,6 +1127,7 @@ private: std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; + VAddr indirect_args_addr{}; }; std::array mapped_queues{}; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 5ab233fd..50e4c93a 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -704,4 +704,40 @@ struct PM4CmdReleaseMem { } }; +struct PM4CmdSetBase { + enum class BaseIndex : u32 { + DisplayListPatchTable = 0b0000, + DrawIndexIndirPatchTable = 0b0001, + GdsPartition = 0b0010, + CePartition = 0b0011, + }; + + PM4Type3Header header; + union { + BitField<0, 4, BaseIndex> base_index; + u32 dw1; + }; + u32 address0; + u32 address1; + + template + T Address() const { + ASSERT(base_index == BaseIndex::DisplayListPatchTable || + base_index == BaseIndex::DrawIndexIndirPatchTable); + return reinterpret_cast(address0 | (u64(address1 & 0xffff) << 32u)); + } +}; + +struct PM4CmdDispatchIndirect { + struct GroupDimensions { + u32 dim_x; + u32 dim_y; + u32 dim_z; + }; + + PM4Type3Header header; + u32 data_offset; ///< Byte aligned offset where the required data structure starts + u32 dispatch_initiator; ///< Dispatch Initiator Register +}; + } // namespace AmdGpu diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index d112864d..e9dceb62 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -17,7 +17,8 @@ constexpr vk::BufferUsageFlags AllFlags = vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer; + vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndirectBuffer; std::string_view BufferTypeName(MemoryUsage type) { switch (type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f22f19ea..9231c510 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -90,6 +90,45 @@ void Rasterizer::DispatchDirect() { cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); } +void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { + RENDERER_TRACE; + + const auto cmdbuf = scheduler.CommandBuffer(); + const auto& cs_program = liverpool->regs.cs_program; + const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); + if (!pipeline) { + return; + } + + try { + const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); + if (!has_resources) { + return; + } + } catch (...) { + UNREACHABLE(); + } + + scheduler.EndRendering(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); + const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); + const auto total_offset = base + offset; + + // Emulate PFP-to-ME sync packet + const vk::BufferMemoryBarrier ib_barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eIndirectCommandRead, + .buffer = buffer->Handle(), + .offset = total_offset, + .size = size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eDrawIndirect, + vk::DependencyFlagBits::eByRegion, {}, ib_barrier, {}); + + cmdbuf.dispatchIndirect(buffer->Handle(), total_offset); +} + u64 Rasterizer::Flush() { const u64 current_tick = scheduler.CurrentTick(); SubmitInfo info{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index a151ebc2..34f6ae72 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -34,6 +34,7 @@ public: void Draw(bool is_indexed, u32 index_offset = 0); void DispatchDirect(); + void DispatchIndirect(VAddr address, u32 offset, u32 size); void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd();