amdgpu: added support for several single GFX submits per frame

This commit is contained in:
psucien 2024-05-16 23:56:29 +02:00
parent f881753944
commit 9df1a8d15b
3 changed files with 69 additions and 25 deletions

View File

@ -1411,7 +1411,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
}
}
liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
liverpool->SubmitGfx(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
return ORBIS_OK;
}

View File

@ -9,16 +9,50 @@
namespace AmdGpu {
Liverpool::Liverpool() = default;
Liverpool::Liverpool() {
process_thread = std::jthread{std::bind_front(&Liverpool::Process, this)};
}
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
Liverpool::~Liverpool() {
process_thread.request_stop();
cv_submit.notify_one();
}
void Liverpool::Process(std::stop_token stoken) {
while (!stoken.stop_requested()) {
std::span<const u32> dcb{};
{
std::unique_lock lock{m_ring_access};
cv_submit.wait(lock, stoken, [&]() { return !gfx_ring.empty(); });
if (stoken.stop_requested()) {
break;
}
dcb = gfx_ring.front();
gfx_ring.pop();
}
ASSERT_MSG(dcb.size() != 0, "Empty command list received");
ProcessCmdList(dcb.data(), dcb.size());
cv_complete.notify_all();
}
}
void Liverpool::Wait() {
std::unique_lock lock{m_ring_access};
cv_complete.wait(lock, [this]() { return gfx_ring.empty(); });
}
void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
Common::SetCurrentThreadName("CommandProcessor_Gfx");
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
auto* header = reinterpret_cast<const PM4Header*>(cmdbuf);
u32 processed_cmd_size = 0;
while (processed_cmd_size < size_in_bytes) {
PM4Header* next_header{};
const PM4Header* next_header{};
const u32 type = header->type;
switch (type) {
case 3: {
@ -26,7 +60,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
const u32 count = header->type3.NumWords();
switch (opcode) {
case PM4ItOpcode::Nop: {
const auto* nop = reinterpret_cast<PM4CmdNop*>(header);
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
if (nop->header.count.Value() == 0) {
break;
}
@ -44,30 +78,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break;
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetUconfigReg: {
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::IndexType: {
const auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw;
break;
}
case PM4ItOpcode::DrawIndex2: {
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
@ -77,7 +111,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break;
}
case PM4ItOpcode::DrawIndexAuto: {
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex();
@ -88,21 +122,21 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break;
}
case PM4ItOpcode::EventWriteEos: {
const auto* event_eos = reinterpret_cast<PM4CmdEventWriteEos*>(header);
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
event_eos->SignalFence();
break;
}
case PM4ItOpcode::EventWriteEop: {
const auto* event_eop = reinterpret_cast<PM4CmdEventWriteEop*>(header);
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
event_eop->SignalFence();
break;
}
case PM4ItOpcode::DmaData: {
const auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
break;
}
case PM4ItOpcode::WriteData: {
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
const u32 data_size = (header->type3.count.Value() - 2) * 4;
if (!write_data->wr_one_addr.Value()) {
@ -117,7 +151,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break;
}
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
while (!wait_reg_mem->Test()) {
using namespace std::chrono_literals;

View File

@ -11,6 +11,8 @@
#include <condition_variable>
#include <functional>
#include <future>
#include <span>
#include <queue>
namespace AmdGpu {
@ -614,23 +616,31 @@ struct Liverpool {
public:
Liverpool();
~Liverpool();
void Submit(u32* cmdbuf, u32 size_in_bytes) {
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
void SubmitGfx(const u32* dcb, u32 dcb_size) {
{
std::scoped_lock lock{m_ring_access};
gfx_ring.push({dcb, dcb_size});
}
cv_submit.notify_one();
}
void SubmitDone() {
// This is wrong as `submitDone()` should never be blocking. The behavior will be
// reworked with mutiple queues introduction
if (cp.valid()) {
cp.get();
}
Wait();
}
private:
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
void ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes);
void Process(std::stop_token stoken);
void Wait();
std::future<void> cp{};
std::jthread process_thread{};
std::queue<std::span<const u32>> gfx_ring{};
std::condition_variable_any cv_submit{};
std::condition_variable cv_complete{};
std::mutex m_ring_access{};
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);