Merge pull request #128 from psucien/gnm_driver/basic_sync
gnm_driver: Gnm eventq and GPU flips
This commit is contained in:
commit
b326ce5f69
|
@ -6,6 +6,8 @@
|
|||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/gnmdriver/gnmdriver.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
#include "core/platform.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
@ -26,12 +28,33 @@ template <u32 data_block_size>
|
|||
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
||||
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized
|
||||
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
|
||||
return cmdbuf + data_block_size + 1 /* header */;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmAddEqEvent() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
|
||||
|
||||
if (!eq) {
|
||||
return ORBIS_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
|
||||
EqueueEvent kernel_event{};
|
||||
kernel_event.event.ident = id;
|
||||
kernel_event.event.filter = EVFILT_GRAPHICS_CORE;
|
||||
kernel_event.event.flags = 1;
|
||||
kernel_event.event.fflags = 0;
|
||||
kernel_event.event.data = id;
|
||||
kernel_event.event.udata = udata;
|
||||
eq->addEvent(kernel_event);
|
||||
|
||||
Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) {
|
||||
ASSERT_MSG(irq == Platform::InterruptId::GfxEop,
|
||||
"An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper
|
||||
// filtering in trigger function
|
||||
eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr);
|
||||
});
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -63,7 +86,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
|
|||
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
|
||||
wait_reg_mem->ref = ref;
|
||||
wait_reg_mem->mask = mask;
|
||||
wait_reg_mem->poll_interval = 10;
|
||||
wait_reg_mem->poll_interval = 10u;
|
||||
|
||||
WriteTrailingNop<2>(cmdbuf + 7);
|
||||
return ORBIS_OK;
|
||||
|
@ -131,8 +154,17 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDeleteEqEvent() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
|
||||
|
||||
if (!eq) {
|
||||
return ORBIS_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
|
||||
eq->removeEvent(id);
|
||||
|
||||
Platform::IrqC::Instance()->Unregister();
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -205,14 +237,14 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
|||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Unknown58>(
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(
|
||||
cmdbuf, 6); // for some reason the packet indicates larger size
|
||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u);
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
|
||||
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
|
||||
} else {
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||
}
|
||||
return 0x100; // it is a size, not a retcode
|
||||
}
|
||||
|
@ -321,7 +353,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
|
|||
if constexpr (g_fair_hw_init) {
|
||||
ASSERT_MSG(0, "Not implemented");
|
||||
} else {
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||
}
|
||||
return 0x100; // it is a size, not a retcode
|
||||
}
|
||||
|
@ -335,7 +367,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
|||
if constexpr (g_fair_hw_init) {
|
||||
ASSERT_MSG(0, "Not implemented");
|
||||
} else {
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||
}
|
||||
return 0x100; // it is a size, not a retcode
|
||||
}
|
||||
|
@ -565,9 +597,9 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) {
|
|||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (cmdbuf && (size == 6)) {
|
||||
cmdbuf = WritePacket<PM4ItOpcode::Nop>(
|
||||
cmdbuf, PM4ShaderType::ShaderGraphics,
|
||||
static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u);
|
||||
cmdbuf =
|
||||
WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics,
|
||||
PM4CmdNop::PayloadType::DebugMarkerPop, 0u, 0u, 0u, 0u);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return -1;
|
||||
|
@ -588,7 +620,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
|
|||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||
nop->header =
|
||||
PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics};
|
||||
nop->data_block[0] = static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPush);
|
||||
nop->data_block[0] = PM4CmdNop::PayloadType::DebugMarkerPush;
|
||||
const auto marker_len = len + 1;
|
||||
std::memcpy(&nop->data_block[1], marker, marker_len);
|
||||
std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0,
|
||||
|
@ -614,8 +646,24 @@ int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (size != 7) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uintptr_t label_addr{};
|
||||
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
|
||||
|
||||
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
|
||||
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
|
||||
wait_reg_mem->raw = 0x13u;
|
||||
*reinterpret_cast<uintptr_t*>(&wait_reg_mem->poll_addr_lo) =
|
||||
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
|
||||
wait_reg_mem->ref = 0u;
|
||||
wait_reg_mem->mask = 0xffff'ffffu;
|
||||
wait_reg_mem->poll_interval = 10u;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -824,8 +872,48 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
// A fullscreen triangle with one uv set
|
||||
const static u32 shader_code[] = {
|
||||
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
||||
0x36020081u, // v_and_b32 v1, 1, v0
|
||||
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
||||
0x360000c2u, // v_and_b32 v0, -2, v0
|
||||
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
||||
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
||||
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
||||
0x7e040280u, // v_cvt_f32_i32 v0, v0
|
||||
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
||||
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
||||
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
|
||||
0xbf810000u, // s_endpgm
|
||||
|
||||
// OrbShdr header
|
||||
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
||||
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u};
|
||||
|
||||
const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10
|
||||
const static u32 vs_regs[] = {
|
||||
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
||||
|
||||
if (shader_id != 0) {
|
||||
return 0x8eee00ff;
|
||||
}
|
||||
|
||||
// Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has
|
||||
// a check for zero in the upper part of shader address. In our case, the address is a
|
||||
// pointer to a stack memory, so the check will likely fail. To workaround it we will
|
||||
// repeat set shader functionality here as it is trivial.
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
|
||||
cmdbuf =
|
||||
PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
||||
|
||||
WriteTrailingNop<11>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -960,6 +1048,8 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() {
|
|||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (!cmdbuf || size <= 0x1c) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -987,7 +1077,6 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3
|
|||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
||||
|
||||
WriteTrailingNop<11>(cmdbuf);
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -1186,44 +1275,142 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf_idx, u32 flip_mode,
|
||||
u32 flip_arg, void* unk) {
|
||||
// check for `prepareFlip` packet
|
||||
cmdbuf += size - 64;
|
||||
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
|
||||
|
||||
std::array<u32, 7> backup{};
|
||||
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
|
||||
|
||||
ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlipLabel),
|
||||
"Invalid flip packet");
|
||||
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");
|
||||
|
||||
const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
|
||||
flip_arg, nullptr /*unk*/);
|
||||
if (flip_result != 0) {
|
||||
if (flip_result == 0x80290012) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
|
||||
return 0x80d11081;
|
||||
} else {
|
||||
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
|
||||
return flip_result;
|
||||
}
|
||||
}
|
||||
|
||||
uintptr_t label_addr{};
|
||||
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
|
||||
|
||||
// Write event to lock the VO surface
|
||||
auto* write_lock = reinterpret_cast<PM4CmdWriteData*>(cmdbuf);
|
||||
write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
|
||||
write_lock->raw = 0x500u;
|
||||
const auto addr = (label_addr + buf_idx * sizeof(label_addr)) & ~0x3ull;
|
||||
write_lock->Address<uintptr_t>(addr);
|
||||
write_lock->data[0] = 1;
|
||||
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf + 5);
|
||||
|
||||
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlip) {
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x39};
|
||||
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||
} else {
|
||||
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipLabel) {
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x34};
|
||||
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||
|
||||
// Write event to update label
|
||||
auto* write_label = reinterpret_cast<PM4CmdWriteData*>(cmdbuf + 0x3b);
|
||||
write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
|
||||
write_label->raw = 0x500u;
|
||||
write_label->dst_addr_lo = backup[2] & 0xffff'fffcu;
|
||||
write_label->dst_addr_hi = backup[3];
|
||||
write_label->data[0] = backup[4];
|
||||
}
|
||||
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterruptLabel) {
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
|
||||
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||
|
||||
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
|
||||
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
|
||||
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
|
||||
write_eop->address_lo = backup[2] & 0xffff'fffcu;
|
||||
write_eop->data_control = (backup[3] & 0xffffu) | 0x2200'0000u;
|
||||
write_eop->data_lo = backup[4];
|
||||
write_eop->data_hi = 0u;
|
||||
}
|
||||
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterrupt) {
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
|
||||
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||
|
||||
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
|
||||
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
|
||||
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
|
||||
write_eop->address_lo = 0u;
|
||||
write_eop->data_control = 0x100'0000u;
|
||||
write_eop->data_lo = 0u;
|
||||
write_eop->data_hi = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes, u32 vo_handle,
|
||||
u32 buf_idx, u32 flip_mode, u32 flip_arg) {
|
||||
LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx);
|
||||
|
||||
auto* cmdbuf = reinterpret_cast<u32*>(dcb_gpu_addrs[count - 1]);
|
||||
const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4;
|
||||
|
||||
const s32 patch_result =
|
||||
PatchFlipRequest(cmdbuf, size_dw, vo_handle, buf_idx, flip_mode, flip_arg, nullptr /*unk*/);
|
||||
if (patch_result != ORBIS_OK) {
|
||||
return patch_result;
|
||||
}
|
||||
|
||||
return sceGnmSubmitCommandBuffers(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
|
||||
ccb_sizes_in_bytes);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes,
|
||||
void* ccbGpuAddrs[], u32* ccbSizesInBytes) {
|
||||
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes) {
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!");
|
||||
|
||||
if (!dcbGpuAddrs || !dcbSizesInBytes) {
|
||||
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
||||
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
|
||||
return 0x80d11000;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
if (dcbSizesInBytes[i] == 0) {
|
||||
if (dcb_sizes_in_bytes[i] == 0) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i);
|
||||
return 0x80d11000;
|
||||
}
|
||||
if (dcbSizesInBytes[i] > 0x3ffffc) {
|
||||
if (dcb_sizes_in_bytes[i] > 0x3ffffc) {
|
||||
LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||
dcbSizesInBytes[i]);
|
||||
dcb_sizes_in_bytes[i]);
|
||||
return 0x80d11000;
|
||||
}
|
||||
if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) {
|
||||
if (ccb_sizes_in_bytes && ccb_sizes_in_bytes[i] > 0x3ffffc) {
|
||||
LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||
ccbSizesInBytes[i]);
|
||||
ccb_sizes_in_bytes[i]);
|
||||
return 0x80d11000;
|
||||
}
|
||||
}
|
||||
|
||||
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcbGpuAddrs[0]), dcbSizesInBytes[0]);
|
||||
liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -1234,7 +1421,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
|
||||
liverpool->SubmitDone();
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
#include "core/libraries/kernel/event_queues.h"
|
||||
|
||||
namespace Core::Loader {
|
||||
class SymbolsResolver;
|
||||
|
@ -11,7 +12,9 @@ class SymbolsResolver;
|
|||
|
||||
namespace Libraries::GnmDriver {
|
||||
|
||||
int PS4_SYSV_ABI sceGnmAddEqEvent();
|
||||
using namespace Kernel;
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata);
|
||||
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed();
|
||||
int PS4_SYSV_ABI sceGnmBeginWorkload();
|
||||
s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
|
||||
|
@ -28,7 +31,7 @@ int PS4_SYSV_ABI sceGnmDebuggerSetAddressWatch();
|
|||
int PS4_SYSV_ABI sceGnmDebuggerWriteGds();
|
||||
int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister();
|
||||
int PS4_SYSV_ABI sceGnmDebugHardwareStatus();
|
||||
int PS4_SYSV_ABI sceGnmDeleteEqEvent();
|
||||
s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id);
|
||||
int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
|
||||
int PS4_SYSV_ABI sceGnmDingDong();
|
||||
int PS4_SYSV_ABI sceGnmDingDongForWorkload();
|
||||
|
@ -104,7 +107,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
|
|||
int PS4_SYSV_ABI sceGnmInsertSetColorMarker();
|
||||
int PS4_SYSV_ABI sceGnmInsertSetMarker();
|
||||
int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker();
|
||||
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone();
|
||||
s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx);
|
||||
int PS4_SYSV_ABI sceGnmIsCoredumpValid();
|
||||
int PS4_SYSV_ABI sceGnmIsUserPaEnabled();
|
||||
int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex();
|
||||
|
@ -137,7 +140,7 @@ s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs);
|
|||
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
|
||||
u32 modifier);
|
||||
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader();
|
||||
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader();
|
||||
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier);
|
||||
int PS4_SYSV_ABI sceGnmSetEsShader();
|
||||
int PS4_SYSV_ABI sceGnmSetGsRingSizes();
|
||||
int PS4_SYSV_ABI sceGnmSetGsShader();
|
||||
|
@ -191,9 +194,12 @@ int PS4_SYSV_ABI sceGnmSqttStopTrace();
|
|||
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer();
|
||||
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
|
||||
int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers();
|
||||
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes, u32 vo_handle,
|
||||
u32 buf_idx, u32 flip_mode, u32 flip_arg);
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes);
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();
|
||||
|
|
|
@ -20,6 +20,14 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int EqueueInternal::removeEvent(u64 id) {
|
||||
const auto& event_q =
|
||||
std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; });
|
||||
ASSERT(event_q != m_events.cend());
|
||||
m_events.erase(event_q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
||||
std::unique_lock lock{m_mutex};
|
||||
int ret = 0;
|
||||
|
|
|
@ -42,11 +42,22 @@ using ResetFunc = void (*)(EqueueEvent* event);
|
|||
using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event);
|
||||
|
||||
struct SceKernelEvent {
|
||||
enum Type : u64 {
|
||||
Compute0RelMem = 0x00,
|
||||
Compute1RelMem = 0x01,
|
||||
Compute2RelMem = 0x02,
|
||||
Compute3RelMem = 0x03,
|
||||
Compute4RelMem = 0x04,
|
||||
Compute5RelMem = 0x05,
|
||||
Compute6RelMem = 0x06,
|
||||
GfxEop = 0x40
|
||||
};
|
||||
|
||||
u64 ident = 0; /* identifier for this event */
|
||||
s16 filter = 0; /* filter for event */
|
||||
u16 flags = 0;
|
||||
u32 fflags = 0;
|
||||
s64 data = 0;
|
||||
u64 data = 0;
|
||||
void* udata = nullptr; /* opaque user data identifier */
|
||||
};
|
||||
|
||||
|
@ -80,6 +91,7 @@ public:
|
|||
this->m_name = m_name;
|
||||
}
|
||||
int addEvent(const EqueueEvent& event);
|
||||
int removeEvent(u64 id);
|
||||
int waitForEvents(SceKernelEvent* ev, int num, u32 micros);
|
||||
bool triggerEvent(u64 ident, s16 filter, void* trigger_data);
|
||||
int getTriggeredEvents(SceKernelEvent* ev, int num);
|
||||
|
|
|
@ -11,29 +11,34 @@ namespace Libraries::Kernel {
|
|||
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
|
||||
if (eq == nullptr) {
|
||||
LOG_ERROR(Kernel_Event, "Event queue is null!");
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
if (name == nullptr) {
|
||||
LOG_ERROR(Kernel_Event, "Event queue name is invalid!");
|
||||
return SCE_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
if (name == NULL) {
|
||||
LOG_ERROR(Kernel_Event, "Event queue name is null!");
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
// Maximum is 32 including null terminator
|
||||
static constexpr size_t MaxEventQueueNameSize = 32;
|
||||
if (std::strlen(name) > MaxEventQueueNameSize) {
|
||||
LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!");
|
||||
return SCE_KERNEL_ERROR_ENAMETOOLONG;
|
||||
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
|
||||
}
|
||||
|
||||
LOG_INFO(Kernel_Event, "name = {}", name);
|
||||
|
||||
*eq = new EqueueInternal;
|
||||
(*eq)->setName(std::string(name));
|
||||
return SCE_OK;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) {
|
||||
if (eq == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
|
||||
delete eq;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
||||
|
|
|
@ -11,6 +11,7 @@ using SceKernelUseconds = u32;
|
|||
using SceKernelEqueue = EqueueInternal*;
|
||||
|
||||
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name);
|
||||
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq);
|
||||
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
||||
SceKernelUseconds* timo);
|
||||
|
||||
|
|
|
@ -169,6 +169,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||
// equeue
|
||||
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
|
||||
LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue);
|
||||
LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue);
|
||||
// misc
|
||||
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/time_management.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/platform.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
||||
|
@ -196,16 +197,22 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
|||
reinterpret_cast<void*>(req.flip_arg));
|
||||
}
|
||||
}
|
||||
|
||||
// Reset flip label
|
||||
req.port->buffer_labels[req.index] = 0;
|
||||
LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index);
|
||||
}
|
||||
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
const auto& buffer = port->buffer_slots[index];
|
||||
const auto& group = port->groups[buffer.group_index];
|
||||
auto* frame = renderer->PrepareFrame(group, buffer.address_left);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
if (requests.size() >= 2) {
|
||||
if (requests.size() >= port->NumRegisteredBuffers()) {
|
||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -215,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
|||
.index = index,
|
||||
.flip_arg = flip_arg,
|
||||
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
|
||||
.eop = is_eop,
|
||||
});
|
||||
|
||||
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||
|
|
|
@ -19,6 +19,8 @@ struct VideoOutPort {
|
|||
bool is_open = false;
|
||||
SceVideoOutResolutionStatus resolution;
|
||||
std::array<VideoOutBuffer, MaxDisplayBuffers> buffer_slots;
|
||||
std::array<uintptr_t, MaxDisplayBuffers> buffer_labels; // should be contiguous in memory
|
||||
static_assert(sizeof(buffer_labels[0]) == 8u);
|
||||
std::array<BufferAttributeGroup, MaxDisplayBufferGroups> groups;
|
||||
FlipStatus flip_status;
|
||||
SceVideoOutVblankStatus vblank_status;
|
||||
|
@ -32,6 +34,11 @@ struct VideoOutPort {
|
|||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
[[nodiscard]] int NumRegisteredBuffers() const {
|
||||
return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(),
|
||||
[](auto& buffer) { return buffer.group_index != -1; });
|
||||
}
|
||||
};
|
||||
|
||||
struct ServiceThreadParams {
|
||||
|
@ -57,7 +64,7 @@ public:
|
|||
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
|
||||
|
||||
void Flip(std::chrono::microseconds timeout);
|
||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg);
|
||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||
|
||||
void Vblank();
|
||||
|
||||
|
@ -68,6 +75,7 @@ private:
|
|||
s32 index;
|
||||
s64 flip_arg;
|
||||
u64 submit_tsc;
|
||||
bool eop;
|
||||
};
|
||||
|
||||
std::mutex mutex;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
#include "core/loader/symbols_resolver.h"
|
||||
#include "core/platform.h"
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
|
||||
|
@ -210,6 +211,27 @@ void Vblank() {
|
|||
return driver->Vblank();
|
||||
}
|
||||
|
||||
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
|
||||
auto* port = driver->GetPort(handle);
|
||||
ASSERT(port);
|
||||
*label_addr = reinterpret_cast<uintptr_t>(port->buffer_labels.data());
|
||||
}
|
||||
|
||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) {
|
||||
auto* port = driver->GetPort(handle);
|
||||
if (!port) {
|
||||
return 0x8029000b;
|
||||
}
|
||||
|
||||
Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) {
|
||||
ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured");
|
||||
const auto result = driver->SubmitFlip(port, buf_id, arg, true);
|
||||
ASSERT_MSG(result, "EOP flip submission failed");
|
||||
});
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
|
||||
|
||||
|
|
|
@ -102,6 +102,10 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
|
|||
void Flip(std::chrono::microseconds micros);
|
||||
void Vblank();
|
||||
|
||||
// Internal system functions
|
||||
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr);
|
||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk);
|
||||
|
||||
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
||||
|
||||
} // namespace Libraries::VideoOut
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/singleton.h"
|
||||
#include "common/types.h"
|
||||
#include "magic_enum.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
|
||||
namespace Platform {
|
||||
|
||||
enum class InterruptId : u32 {
|
||||
Compute0RelMem = 0u,
|
||||
Compute1RelMem = 1u,
|
||||
Compute2RelMem = 2u,
|
||||
Compute3RelMem = 3u,
|
||||
Compute4RelMem = 4u,
|
||||
Compute5RelMem = 5u,
|
||||
Compute6RelMem = 6u,
|
||||
GfxEop = 0x40u
|
||||
};
|
||||
|
||||
using IrqHandler = std::function<void(InterruptId)>;
|
||||
|
||||
struct IrqController {
|
||||
void RegisterOnce(IrqHandler handler) {
|
||||
std::unique_lock lock{m_lock};
|
||||
one_time_subscribers.emplace(handler);
|
||||
}
|
||||
|
||||
void Register(IrqHandler handler) {
|
||||
ASSERT_MSG(!persistent_handler.has_value(),
|
||||
"Too many persistent handlers"); // Add a slot map if so
|
||||
|
||||
std::unique_lock lock{m_lock};
|
||||
persistent_handler.emplace(handler);
|
||||
}
|
||||
|
||||
void Unregister() {
|
||||
std::unique_lock lock{m_lock};
|
||||
persistent_handler.reset();
|
||||
}
|
||||
|
||||
void Signal(InterruptId irq) {
|
||||
std::unique_lock lock{m_lock};
|
||||
|
||||
LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq));
|
||||
|
||||
if (persistent_handler) {
|
||||
persistent_handler.value()(irq);
|
||||
}
|
||||
|
||||
while (!one_time_subscribers.empty()) {
|
||||
const auto& h = one_time_subscribers.front();
|
||||
h(irq);
|
||||
|
||||
one_time_subscribers.pop();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<IrqHandler> persistent_handler{};
|
||||
std::queue<IrqHandler> one_time_subscribers{};
|
||||
std::mutex m_lock{};
|
||||
};
|
||||
|
||||
using IrqC = Common::Singleton<IrqController>;
|
||||
|
||||
} // namespace Platform
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
|
||||
|
@ -11,6 +12,8 @@ namespace AmdGpu {
|
|||
Liverpool::Liverpool() = default;
|
||||
|
||||
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
||||
Common::SetCurrentThreadName("CommandProcessor_Gfx");
|
||||
|
||||
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
|
||||
u32 processed_cmd_size = 0;
|
||||
|
||||
|
@ -25,30 +28,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
|||
case PM4ItOpcode::Nop:
|
||||
break;
|
||||
case PM4ItOpcode::SetContextReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset],
|
||||
header + 2, (count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset],
|
||||
header + 2, (count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
|
||||
const auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
|
||||
regs.index_buffer_type.raw = index_type->raw;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndex2: {
|
||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
|
||||
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
|
||||
regs.max_index_size = draw_index->max_size;
|
||||
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
||||
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
||||
|
@ -58,22 +61,52 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
|||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexAuto: {
|
||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
|
||||
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
// rasterizer->DrawIndex();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
// const auto* dispatch_direct = reinterpret_cast<PM4CmdDispatchDirect*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEos: {
|
||||
const auto* event_eos = reinterpret_cast<PM4CmdEventWriteEos*>(header);
|
||||
event_eos->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEop: {
|
||||
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header);
|
||||
const InterruptSelect irq_sel = event_write->int_sel;
|
||||
const DataSelect data_sel = event_write->data_sel;
|
||||
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
|
||||
*event_write->Address() = event_write->DataQWord();
|
||||
const auto* event_eop = reinterpret_cast<PM4CmdEventWriteEop*>(header);
|
||||
event_eop->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
|
||||
const auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WriteData: {
|
||||
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
while (!wait_reg_mem->Test()) {
|
||||
using namespace std::chrono_literals;
|
||||
std::this_thread::sleep_for(1ms);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -3,10 +3,15 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
|
||||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||
|
@ -610,7 +615,20 @@ struct Liverpool {
|
|||
public:
|
||||
Liverpool();
|
||||
|
||||
void Submit(u32* cmdbuf, u32 size_in_bytes) {
|
||||
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
|
||||
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
|
||||
}
|
||||
void SubmitDone() {
|
||||
// This is wrong as `submitDone()` should never be blocking. The behavior will be
|
||||
// reworked with mutiple queues introduction
|
||||
cp.get();
|
||||
}
|
||||
|
||||
private:
|
||||
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
|
||||
|
||||
std::future<void> cp{};
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
|
||||
#include <cstring>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/rdtsc.h"
|
||||
#include "common/types.h"
|
||||
#include "core/platform.h"
|
||||
#include "video_core/amdgpu/pm4_opcodes.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -201,13 +203,18 @@ struct PM4CmdNop {
|
|||
PM4Type3Header header;
|
||||
u32 data_block[0];
|
||||
|
||||
enum class PayloadType : u32 {
|
||||
DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope
|
||||
DebugMarkerPop = 0x68750002, ///< End of GPU event scope
|
||||
SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet
|
||||
SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet
|
||||
SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet
|
||||
DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color
|
||||
enum PayloadType : u32 {
|
||||
DebugMarkerPush = 0x68750001u, ///< Begin of GPU event scope
|
||||
DebugMarkerPop = 0x68750002u, ///< End of GPU event scope
|
||||
SetVsharpInUdata = 0x68750004u, ///< Indicates that V# will be set in the next packet
|
||||
SetTsharpInUdata = 0x68750005u, ///< Indicates that T# will be set in the next packet
|
||||
SetSsharpInUdata = 0x68750006u, ///< Indicates that S# will be set in the next packet
|
||||
DebugColorMarkerPush = 0x6875000eu, ///< Begin of GPU event scope with color
|
||||
PatchedFlip = 0x68750776u, ///< Patched flip marker
|
||||
PrepareFlip = 0x68750777u, ///< Flip marker
|
||||
PrepareFlipLabel = 0x68750778u, ///< Flip marker with label address
|
||||
PrepareFlipInterrupt = 0x68750780u, ///< Flip marker with interrupt
|
||||
PrepareFlipInterruptLabel = 0x68750781u, ///< Flip marker with interrupt and label
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -277,13 +284,52 @@ struct PM4CmdEventWriteEop {
|
|||
u32 data_lo; ///< Value that will be written to memory when event occurs
|
||||
u32 data_hi; ///< Value that will be written to memory when event occurs
|
||||
|
||||
u64* Address() const {
|
||||
return reinterpret_cast<u64*>(address_lo | u64(address_hi) << 32);
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
return data_lo;
|
||||
}
|
||||
|
||||
u64 DataQWord() const {
|
||||
return data_lo | u64(data_hi) << 32;
|
||||
}
|
||||
|
||||
void SignalFence() const {
|
||||
switch (data_sel.Value()) {
|
||||
case DataSelect::Data32Low: {
|
||||
*Address<u32>() = DataDWord();
|
||||
break;
|
||||
}
|
||||
case DataSelect::Data64: {
|
||||
*Address<u64>() = DataQWord();
|
||||
break;
|
||||
}
|
||||
case DataSelect::PerfCounter: {
|
||||
*Address<u64>() = Common::FencedRDTSC();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
switch (int_sel.Value()) {
|
||||
case InterruptSelect::None: {
|
||||
// No interrupt
|
||||
break;
|
||||
}
|
||||
case InterruptSelect::IrqWhenWriteConfirm: {
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4DmaData {
|
||||
|
@ -311,11 +357,24 @@ struct PM4DmaData {
|
|||
};
|
||||
|
||||
struct PM4CmdWaitRegMem {
|
||||
enum class Engine : u32 { Me = 0u, Pfp = 1u };
|
||||
enum class MemSpace : u32 { Register = 0u, Memory = 1u };
|
||||
enum class Function : u32 {
|
||||
Always = 0u,
|
||||
LessThan = 1u,
|
||||
LessThanEqual = 2u,
|
||||
Equal = 3u,
|
||||
NotEqual = 4u,
|
||||
GreaterThanEqual = 5u,
|
||||
GreaterThan = 6u,
|
||||
Reserved = 7u
|
||||
};
|
||||
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<0, 3, u32> function;
|
||||
BitField<4, 1, u32> mem_space;
|
||||
BitField<8, 1, u32> engine;
|
||||
BitField<0, 3, Function> function;
|
||||
BitField<4, 1, MemSpace> mem_space;
|
||||
BitField<8, 1, Engine> engine;
|
||||
u32 raw;
|
||||
};
|
||||
u32 poll_addr_lo;
|
||||
|
@ -323,6 +382,116 @@ struct PM4CmdWaitRegMem {
|
|||
u32 ref;
|
||||
u32 mask;
|
||||
u32 poll_interval;
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
switch (function.Value()) {
|
||||
case Function::Always: {
|
||||
return true;
|
||||
}
|
||||
case Function::LessThan: {
|
||||
return (*Address() & mask) < ref;
|
||||
}
|
||||
case Function::LessThanEqual: {
|
||||
return (*Address() & mask) <= ref;
|
||||
}
|
||||
case Function::Equal: {
|
||||
return (*Address() & mask) == ref;
|
||||
}
|
||||
case Function::NotEqual: {
|
||||
return (*Address() & mask) != ref;
|
||||
}
|
||||
case Function::GreaterThanEqual: {
|
||||
return (*Address() & mask) >= ref;
|
||||
}
|
||||
case Function::GreaterThan: {
|
||||
return (*Address() & mask) > ref;
|
||||
}
|
||||
case Function::Reserved:
|
||||
[[fallthrough]];
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdWriteData {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<8, 11, u32> dst_sel;
|
||||
BitField<16, 1, u32> wr_one_addr;
|
||||
BitField<20, 1, u32> wr_confirm;
|
||||
BitField<30, 1, u32> engine_sel;
|
||||
u32 raw;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
u32 dst_addr_lo;
|
||||
u32 dst_addr_hi;
|
||||
};
|
||||
u64 addr64;
|
||||
};
|
||||
u32 data[0];
|
||||
|
||||
template <typename T>
|
||||
void Address(T addr) {
|
||||
addr64 = reinterpret_cast<u64>(addr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(addr64);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdEventWriteEos {
|
||||
enum class Command : u32 {
|
||||
GdsStore = 1u,
|
||||
SingalFence = 2u,
|
||||
};
|
||||
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
u32 event_control;
|
||||
BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR
|
||||
BitField<8, 4, u32> event_index; ///< Event index
|
||||
};
|
||||
u32 address_lo;
|
||||
union {
|
||||
u32 cmd_info;
|
||||
BitField<0, 16, u32> address_hi; ///< High bits of address
|
||||
BitField<29, 3, Command> command; ///< Command
|
||||
};
|
||||
union {
|
||||
u32 data; ///< Fence value that will be written to memory when event occurs
|
||||
BitField<0, 16, u32>
|
||||
gds_index; ///< Indexed offset from the start of the segment within the partition
|
||||
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
|
||||
};
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
return this->data;
|
||||
}
|
||||
|
||||
void SignalFence() const {
|
||||
switch (command.Value()) {
|
||||
case Command::SingalFence: {
|
||||
*Address() = DataDWord();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
|
|
@ -49,7 +49,7 @@ enum class PM4ItOpcode : u32 {
|
|||
PremableCntl = 0x4A,
|
||||
DmaData = 0x50,
|
||||
ContextRegRmw = 0x51,
|
||||
Unknown58 = 0x58,
|
||||
AcquireMem = 0x58,
|
||||
LoadShReg = 0x5F,
|
||||
LoadConfigReg = 0x60,
|
||||
LoadContextReg = 0x61,
|
||||
|
|
Loading…
Reference in New Issue