diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b5dca74b..659c0a5f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1411,7 +1411,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], } } - liverpool->Submit(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); + liverpool->SubmitGfx(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); return ORBIS_OK; } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 62e12fa2..03e93eb9 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -9,16 +9,50 @@ namespace AmdGpu { -Liverpool::Liverpool() = default; +Liverpool::Liverpool() { + process_thread = std::jthread{std::bind_front(&Liverpool::Process, this)}; +} -void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { +Liverpool::~Liverpool() { + process_thread.request_stop(); + cv_submit.notify_one(); +} + +void Liverpool::Process(std::stop_token stoken) { + while (!stoken.stop_requested()) { + std::span dcb{}; + { + std::unique_lock lock{m_ring_access}; + cv_submit.wait(lock, stoken, [&]() { return !gfx_ring.empty(); }); + + if (stoken.stop_requested()) { + break; + } + + dcb = gfx_ring.front(); + gfx_ring.pop(); + } + + ASSERT_MSG(dcb.size() != 0, "Empty command list received"); + ProcessCmdList(dcb.data(), dcb.size()); + + cv_complete.notify_all(); + } +} + +void Liverpool::Wait() { + std::unique_lock lock{m_ring_access}; + cv_complete.wait(lock, [this]() { return gfx_ring.empty(); }); +} + +void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { Common::SetCurrentThreadName("CommandProcessor_Gfx"); - auto* header = reinterpret_cast(cmdbuf); + auto* header = reinterpret_cast(cmdbuf); u32 processed_cmd_size = 0; while (processed_cmd_size < size_in_bytes) { - PM4Header* next_header{}; + const PM4Header* next_header{}; const u32 type = header->type; switch (type) { case 3: { @@ -26,7 +60,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { const u32 count = header->type3.NumWords(); switch (opcode) { case PM4ItOpcode::Nop: { - const auto* nop = reinterpret_cast(header); + const auto* nop = reinterpret_cast(header); if (nop->header.count.Value() == 0) { break; } @@ -44,30 +78,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::SetContextReg: { - const auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetShReg: { - const auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetUconfigReg: { - const auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::IndexType: { - const auto* index_type = reinterpret_cast(header); + const auto* index_type = reinterpret_cast(header); regs.index_buffer_type.raw = index_type->raw; break; } case PM4ItOpcode::DrawIndex2: { - const auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.max_index_size = draw_index->max_size; regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); @@ -77,7 +111,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::DrawIndexAuto: { - const auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; // rasterizer->DrawIndex(); @@ -88,21 +122,21 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::EventWriteEos: { - const auto* event_eos = reinterpret_cast(header); + const auto* event_eos = reinterpret_cast(header); event_eos->SignalFence(); break; } case PM4ItOpcode::EventWriteEop: { - const auto* event_eop = reinterpret_cast(header); + const auto* event_eop = reinterpret_cast(header); event_eop->SignalFence(); break; } case PM4ItOpcode::DmaData: { - const auto* dma_data = reinterpret_cast(header); + const auto* dma_data = reinterpret_cast(header); break; } case PM4ItOpcode::WriteData: { - const auto* write_data = reinterpret_cast(header); + const auto* write_data = reinterpret_cast(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); const u32 data_size = (header->type3.count.Value() - 2) * 4; if (!write_data->wr_one_addr.Value()) { @@ -117,7 +151,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::WaitRegMem: { - const auto* wait_reg_mem = reinterpret_cast(header); + const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { using namespace std::chrono_literals; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 01c20c01..c040a2ff 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include namespace AmdGpu { @@ -614,23 +616,31 @@ struct Liverpool { public: Liverpool(); + ~Liverpool(); - void Submit(u32* cmdbuf, u32 size_in_bytes) { - ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending"); - cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes); + void SubmitGfx(const u32* dcb, u32 dcb_size) { + { + std::scoped_lock lock{m_ring_access}; + gfx_ring.push({dcb, dcb_size}); + } + cv_submit.notify_one(); } void SubmitDone() { // This is wrong as `submitDone()` should never be blocking. The behavior will be // reworked with mutiple queues introduction - if (cp.valid()) { - cp.get(); - } + Wait(); } private: - void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); + void ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes); + void Process(std::stop_token stoken); + void Wait(); - std::future cp{}; + std::jthread process_thread{}; + std::queue> gfx_ring{}; + std::condition_variable_any cv_submit{}; + std::condition_variable cv_complete{}; + std::mutex m_ring_access{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);