amdgpu: `wait_reg_mem` and `write_data` implementation

Command list parsing is temporary moved to async task
This commit is contained in:
psucien 2024-05-09 22:59:35 +02:00
parent bfb18135fb
commit 8e0c67f12e
5 changed files with 103 additions and 11 deletions

View File

@ -27,7 +27,7 @@ template <u32 data_block_size>
static inline u32* WriteTrailingNop(u32* cmdbuf) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
return cmdbuf + data_block_size + 1 /* header */;
}
@ -48,9 +48,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
kernel_event.event.udata = udata;
eq->addEvent(kernel_event);
liverpool->eop_callback = [=]() {
eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr);
};
liverpool->SetEopCallback(
[=]() { eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); });
return ORBIS_OK;
}
@ -82,7 +81,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
wait_reg_mem->ref = ref;
wait_reg_mem->mask = mask;
wait_reg_mem->poll_interval = 10;
wait_reg_mem->poll_interval = 10u;
WriteTrailingNop<2>(cmdbuf + 7);
return ORBIS_OK;
@ -652,10 +651,10 @@ s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle,
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
wait_reg_mem->function.Assign(3u);
wait_reg_mem->mem_space.Assign(1u);
wait_reg_mem->function.Assign(PM4CmdWaitRegMem::Function::Equal);
wait_reg_mem->mem_space.Assign(PM4CmdWaitRegMem::MemSpace::Memory);
*reinterpret_cast<uintptr_t*>(&wait_reg_mem->poll_addr_lo) =
(label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu;
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
wait_reg_mem->ref = 0u;
wait_reg_mem->mask = 0xffff'ffffu;
wait_reg_mem->poll_interval = 10u;
@ -1303,7 +1302,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
write_lock->dst_sel.Assign(5u);
*reinterpret_cast<uintptr_t*>(&write_lock->dst_addr_lo) =
(label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu;
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
write_lock->data[0] = 1;
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf + 5);
@ -1405,7 +1404,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
}
}
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
return ORBIS_OK;
}
@ -1416,7 +1415,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
}
int PS4_SYSV_ABI sceGnmSubmitDone() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
LOG_INFO(Lib_GnmDriver, "called");
liverpool->SubmitDone();
return ORBIS_OK;
}

View File

@ -196,6 +196,9 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
reinterpret_cast<void*>(req.flip_arg));
}
}
// Reset flip label
req.port->buffer_labels[req.index] = 0;
}
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {

View File

@ -117,6 +117,14 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
}
case PM4ItOpcode::WriteData: {
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
const u32 data_size = (header->type3.count.Value() - 2) * 4;
if (!write_data->wr_one_addr.Value()) {
std::memcpy(reinterpret_cast<void*>(write_data->Address()), write_data->data,
data_size);
} else {
UNREACHABLE();
}
break;
}
case PM4ItOpcode::AcquireMem: {
@ -125,6 +133,13 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
}
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
ASSERT(wait_reg_mem->function.Value() == PM4CmdWaitRegMem::Function::Equal);
{
std::unique_lock lock{m_reg_mem};
cv_reg_mem.wait(lock, [&]() { return wait_reg_mem->Test(); });
}
break;
}
default:

View File

@ -3,11 +3,14 @@
#pragma once
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include <array>
#include <condition_variable>
#include <functional>
#include <future>
namespace AmdGpu {
@ -612,9 +615,26 @@ struct Liverpool {
public:
Liverpool();
void Submit(u32* cmdbuf, u32 size_in_bytes) {
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
}
void SubmitDone() {
// This is wrong as `submitDone()` should never be blocking. The behavior will be
// reworked with mutiple queues introduction
cp.get();
}
void SetEopCallback(auto const& cb) {
eop_callback = cb;
}
private:
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
std::function<void(void)> eop_callback{};
std::future<void> cp{};
std::condition_variable cv_reg_mem{};
std::mutex m_reg_mem{};
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);

View File

@ -320,6 +320,19 @@ struct PM4DmaData {
};
struct PM4CmdWaitRegMem {
enum Engine : u32 { Me = 0u, Pfp = 1u };
enum MemSpace : u32 { Register = 0u, Memory = 1u };
enum Function : u32 {
Always = 0u,
LessThan = 1u,
LessThanEqual = 2u,
Equal = 3u,
NotEqual = 4u,
GreaterThanEqual = 5u,
GreaterThan = 6u,
Reserved = 7u
};
PM4Type3Header header;
union {
BitField<0, 3, u32> function;
@ -332,6 +345,41 @@ struct PM4CmdWaitRegMem {
u32 ref;
u32 mask;
u32 poll_interval;
u32* Address() const {
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
}
bool Test() const {
switch (function.Value()) {
case Function::Always: {
return true;
}
case Function::LessThan: {
return (*Address() & mask) < ref;
}
case Function::LessThanEqual: {
return (*Address() & mask) <= ref;
}
case Function::Equal: {
return (*Address() & mask) == ref;
}
case Function::NotEqual: {
return (*Address() & mask) != ref;
}
case Function::GreaterThanEqual: {
return (*Address() & mask) >= ref;
}
case Function::GreaterThan: {
return (*Address() & mask) > ref;
}
case Function::Reserved:
[[fallthrough]];
default: {
UNREACHABLE();
}
}
}
};
struct PM4CmdWriteData {
@ -346,6 +394,10 @@ struct PM4CmdWriteData {
u32 dst_addr_lo;
u32 dst_addr_hi;
u32 data[0];
uintptr_t Address() const {
return (uintptr_t(dst_addr_hi) << 32) | dst_addr_lo;
}
};
} // namespace AmdGpu