From 4206ec3c9449230896aaeeeef210e0429bd380e3 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 7 May 2024 22:46:54 +0200 Subject: [PATCH 01/15] video_core, kernel: added gfx eop event handling --- src/core/libraries/gnmdriver/gnmdriver.cpp | 33 +++++++++++++++++++--- src/core/libraries/gnmdriver/gnmdriver.h | 7 +++-- src/core/libraries/kernel/event_queue.cpp | 8 ++++++ src/core/libraries/kernel/event_queue.h | 14 ++++++++- src/video_core/amdgpu/liverpool.h | 6 +++- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b0706dfd..e5bbaaa0 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -30,8 +30,26 @@ static inline u32* WriteTrailingNop(u32* cmdbuf) { return cmdbuf + data_block_size + 1 /* header */; } -int PS4_SYSV_ABI sceGnmAddEqEvent() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { + LOG_TRACE(Lib_GnmDriver, "called"); + ASSERT_MSG(id == SceKernelEvent::Type::GfxEop); + + if (!eq) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + EqueueEvent kernel_event{}; + kernel_event.event.ident = id; + kernel_event.event.filter = EVFILT_GRAPHICS_CORE; + kernel_event.event.flags = 1; + kernel_event.event.fflags = 0; + kernel_event.event.data = id; + kernel_event.event.udata = udata; + eq->addEvent(kernel_event); + + liverpool->eop_callback = [=]() { + eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); + }; return ORBIS_OK; } @@ -131,8 +149,15 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDeleteEqEvent() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) { + LOG_TRACE(Lib_GnmDriver, "called"); + ASSERT_MSG(id == SceKernelEvent::Type::GfxEop); + + if (!eq) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + eq->removeEvent(id); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 1f25649a..36e33eb3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -4,6 +4,7 @@ #pragma once #include "common/types.h" +#include "core/libraries/kernel/event_queues.h" namespace Core::Loader { class SymbolsResolver; @@ -11,7 +12,9 @@ class SymbolsResolver; namespace Libraries::GnmDriver { -int PS4_SYSV_ABI sceGnmAddEqEvent(); +using namespace Kernel; + +s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata); int PS4_SYSV_ABI sceGnmAreSubmitsAllowed(); int PS4_SYSV_ABI sceGnmBeginWorkload(); s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask, @@ -28,7 +31,7 @@ int PS4_SYSV_ABI sceGnmDebuggerSetAddressWatch(); int PS4_SYSV_ABI sceGnmDebuggerWriteGds(); int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister(); int PS4_SYSV_ABI sceGnmDebugHardwareStatus(); -int PS4_SYSV_ABI sceGnmDeleteEqEvent(); +s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id); int PS4_SYSV_ABI sceGnmDestroyWorkloadStream(); int PS4_SYSV_ABI sceGnmDingDong(); int PS4_SYSV_ABI sceGnmDingDongForWorkload(); diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 8fd70106..8642af96 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -20,6 +20,14 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { return 0; } +int EqueueInternal::removeEvent(u64 id) { + const auto& event_q = std::find_if(m_events.cbegin(), m_events.cend(), + [id](auto& ev) { return ev.event.ident == id; }); + ASSERT(event_q != m_events.cend()); + m_events.erase(event_q); + return 0; +} + int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { std::unique_lock lock{m_mutex}; int ret = 0; diff --git a/src/core/libraries/kernel/event_queue.h b/src/core/libraries/kernel/event_queue.h index 12151a0c..745a0ac0 100644 --- a/src/core/libraries/kernel/event_queue.h +++ b/src/core/libraries/kernel/event_queue.h @@ -42,11 +42,22 @@ using ResetFunc = void (*)(EqueueEvent* event); using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event); struct SceKernelEvent { + enum Type : u64 { + Compute0RelMem = 0x00, + Compute1RelMem = 0x01, + Compute2RelMem = 0x02, + Compute3RelMem = 0x03, + Compute4RelMem = 0x04, + Compute5RelMem = 0x05, + Compute6RelMem = 0x06, + GfxEop = 0x40 + }; + u64 ident = 0; /* identifier for this event */ s16 filter = 0; /* filter for event */ u16 flags = 0; u32 fflags = 0; - s64 data = 0; + u64 data = 0; void* udata = nullptr; /* opaque user data identifier */ }; @@ -80,6 +91,7 @@ public: this->m_name = m_name; } int addEvent(const EqueueEvent& event); + int removeEvent(u64 id); int waitForEvents(SceKernelEvent* ev, int num, u32 micros); bool triggerEvent(u64 ident, s16 filter, void* trigger_data); int getTriggeredEvents(SceKernelEvent* ev, int num); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 229b5804..44c2a526 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -3,10 +3,12 @@ #pragma once -#include #include "common/bit_field.h" #include "common/types.h" +#include +#include + namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) @@ -611,6 +613,8 @@ public: Liverpool(); void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); + + std::function eop_callback{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); From 50bcd1348272b79bc5f3f4f96d992cb0ac1687de Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 07:35:10 +0200 Subject: [PATCH 02/15] video_out: added display buffer labels --- src/core/libraries/videoout/driver.h | 2 ++ src/core/libraries/videoout/video_out.cpp | 6 ++++++ src/core/libraries/videoout/video_out.h | 2 ++ 3 files changed, 10 insertions(+) diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index fac12135..0c1ea582 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -19,6 +19,8 @@ struct VideoOutPort { bool is_open = false; SceVideoOutResolutionStatus resolution; std::array buffer_slots; + std::array buffer_labels; // should be contiguous in memory + static_assert(sizeof(buffer_labels[0]) == 8u); std::array groups; FlipStatus flip_status; SceVideoOutVblankStatus vblank_status; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 363bd538..d6acd86c 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -210,6 +210,12 @@ void Vblank() { return driver->Vblank(); } +void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { + auto* port = driver->GetPort(handle); + ASSERT(port); + *label_addr = reinterpret_cast(port->buffer_labels.data()); +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 951eb6a5..9c7227e6 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -102,6 +102,8 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); void Flip(std::chrono::microseconds micros); void Vblank(); +void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); + void RegisterLib(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::VideoOut From 5f83ad2ecb688f033282324b6a1b2f3ee762210d Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 07:36:14 +0200 Subject: [PATCH 03/15] gnm_driver: `sceGnmInsertWaitFlipDone` added --- src/core/libraries/gnmdriver/gnmdriver.cpp | 22 ++++++++++++++++++++-- src/core/libraries/gnmdriver/gnmdriver.h | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index e5bbaaa0..ded81d6c 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -6,6 +6,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" +#include "core/libraries/videoout/video_out.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -639,8 +640,25 @@ int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmInsertWaitFlipDone() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (size != 7) { + return -1; + } + + uintptr_t label_addr{}; + VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr); + + auto* write_reg_mem = reinterpret_cast(cmdbuf); + write_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; + write_reg_mem->function.Assign(3u); + write_reg_mem->mem_space.Assign(1u); + *reinterpret_cast(&write_reg_mem->poll_addr_lo) = + (label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu; + write_reg_mem->ref = 0u; + write_reg_mem->mask = 0xffff'ffffu; + write_reg_mem->poll_interval = 10u; return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 36e33eb3..e33f0b82 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -107,7 +107,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); int PS4_SYSV_ABI sceGnmInsertSetMarker(); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); -int PS4_SYSV_ABI sceGnmInsertWaitFlipDone(); +s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx); int PS4_SYSV_ABI sceGnmIsCoredumpValid(); int PS4_SYSV_ABI sceGnmIsUserPaEnabled(); int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex(); From 72ea0be9ffd173593005967ff97547c1a1521bd7 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 08:12:19 +0200 Subject: [PATCH 04/15] gnm_driver: `sceGnmSetEmbeddedVsShader` added --- src/core/libraries/gnmdriver/gnmdriver.cpp | 71 ++++++++++++++++++---- src/core/libraries/gnmdriver/gnmdriver.h | 9 ++- src/core/libraries/kernel/event_queue.cpp | 2 +- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index ded81d6c..85b59bcb 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -867,8 +867,48 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + // A fullscreen triangle with one uv set + const static u32 shader_code[] = { + 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 + 0x36020081u, // v_and_b32 v1, 1, v0 + 0x34020281u, // v_lshlrev_b32 v1, 1, v1 + 0x360000c2u, // v_and_b32 v0, -2, v0 + 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 + 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 + 0x7e020b01u, // v_cvt_f32_i32 v1, v1 + 0x7e040280u, // v_cvt_f32_i32 v0, v0 + 0x7e0602f2u, // v_mov_b32 v3, 1.0 + 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done + 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 + 0xbf810000u, // s_endpgm + + // OrbShdr header + 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u}; + + const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10 + const static u32 vs_regs[] = { + u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; + + if (shader_id != 0) { + return 0x8eee00ff; + } + + // Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has + // a check for zero in the upper part of shader address. In our case, the address is a + // pointer to a stack memory, so the check will likely fail. To workaround it we will + // repeat set shader functionality here as it is trivial. + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -1003,6 +1043,8 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() { } s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + if (!cmdbuf || size <= 0x1c) { return -1; } @@ -1030,7 +1072,6 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3 cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT WriteTrailingNop<11>(cmdbuf); - return ORBIS_OK; } @@ -1229,7 +1270,10 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers() { +s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, u32 vo_handle, + u32 buf_idx, u32 flip_mode, u32 flip_arg) { LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); return ORBIS_OK; } @@ -1239,34 +1283,35 @@ int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes, - void* ccbGpuAddrs[], u32* ccbSizesInBytes) { +s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes) { LOG_INFO(Lib_GnmDriver, "called"); ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!"); - if (!dcbGpuAddrs || !dcbSizesInBytes) { + if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL"); return 0x80d11000; } for (u32 i = 0; i < count; i++) { - if (dcbSizesInBytes[i] == 0) { + if (dcb_sizes_in_bytes[i] == 0) { LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i); return 0x80d11000; } - if (dcbSizesInBytes[i] > 0x3ffffc) { + if (dcb_sizes_in_bytes[i] > 0x3ffffc) { LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, - dcbSizesInBytes[i]); + dcb_sizes_in_bytes[i]); return 0x80d11000; } - if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) { + if (ccb_sizes_in_bytes && ccb_sizes_in_bytes[i] > 0x3ffffc) { LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, - ccbSizesInBytes[i]); + ccb_sizes_in_bytes[i]); return 0x80d11000; } } - liverpool->ProcessCmdList(reinterpret_cast(dcbGpuAddrs[0]), dcbSizesInBytes[0]); + liverpool->ProcessCmdList(reinterpret_cast(dcb_sizes_in_bytes[0]), dcb_sizes_in_bytes[0]); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index e33f0b82..c1aeef8e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -140,7 +140,7 @@ s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs); s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs, u32 modifier); int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(); -int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier); int PS4_SYSV_ABI sceGnmSetEsShader(); int PS4_SYSV_ABI sceGnmSetGsRingSizes(); int PS4_SYSV_ABI sceGnmSetGsShader(); @@ -194,9 +194,12 @@ int PS4_SYSV_ABI sceGnmSqttStopTrace(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2(); int PS4_SYSV_ABI sceGnmSqttWaitForEvent(); -int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(); +s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, u32 vo_handle, + u32 buf_idx, u32 flip_mode, u32 flip_arg); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(); -int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], +s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes); int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(); diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 8642af96..d1004f61 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -22,7 +22,7 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { int EqueueInternal::removeEvent(u64 id) { const auto& event_q = std::find_if(m_events.cbegin(), m_events.cend(), - [id](auto& ev) { return ev.event.ident == id; }); + [id](auto& ev) { return ev.event.ident == id; }); ASSERT(event_q != m_events.cend()); m_events.erase(event_q); return 0; From 4746f514ffdee75f5ada963f41f7a11e7b5b8591 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 22:53:55 +0200 Subject: [PATCH 05/15] gnm_driver: various fixes --- src/core/libraries/gnmdriver/gnmdriver.cpp | 32 +++++++++++----------- src/video_core/amdgpu/pm4_cmds.h | 19 ++++++++----- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 85b59bcb..ddd96cea 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -238,7 +238,7 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) { cmdbuf = WriteHeader(cmdbuf, 0xef); cmdbuf = WriteBody(cmdbuf, 0xau, 0u); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -347,7 +347,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -361,7 +361,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -591,9 +591,9 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) { LOG_TRACE(Lib_GnmDriver, "called"); if (cmdbuf && (size == 6)) { - cmdbuf = WritePacket( - cmdbuf, PM4ShaderType::ShaderGraphics, - static_cast(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u); + cmdbuf = + WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, + PM4CmdNop::PayloadType::DebugMarkerPop, 0u, 0u, 0u, 0u); return ORBIS_OK; } return -1; @@ -614,7 +614,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke auto* nop = reinterpret_cast(cmdbuf); nop->header = PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics}; - nop->data_block[0] = static_cast(PM4CmdNop::PayloadType::DebugMarkerPush); + nop->data_block[0] = PM4CmdNop::PayloadType::DebugMarkerPush; const auto marker_len = len + 1; std::memcpy(&nop->data_block[1], marker, marker_len); std::memset(reinterpret_cast(&nop->data_block[1]) + marker_len, 0, @@ -650,15 +650,15 @@ s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, uintptr_t label_addr{}; VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr); - auto* write_reg_mem = reinterpret_cast(cmdbuf); - write_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; - write_reg_mem->function.Assign(3u); - write_reg_mem->mem_space.Assign(1u); - *reinterpret_cast(&write_reg_mem->poll_addr_lo) = + auto* wait_reg_mem = reinterpret_cast(cmdbuf); + wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; + wait_reg_mem->function.Assign(3u); + wait_reg_mem->mem_space.Assign(1u); + *reinterpret_cast(&wait_reg_mem->poll_addr_lo) = (label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu; - write_reg_mem->ref = 0u; - write_reg_mem->mask = 0xffff'ffffu; - write_reg_mem->poll_interval = 10u; + wait_reg_mem->ref = 0u; + wait_reg_mem->mask = 0xffff'ffffu; + wait_reg_mem->poll_interval = 10u; return ORBIS_OK; } @@ -1311,7 +1311,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], } } - liverpool->ProcessCmdList(reinterpret_cast(dcb_sizes_in_bytes[0]), dcb_sizes_in_bytes[0]); + liverpool->ProcessCmdList(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); return ORBIS_OK; } diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index eb258dc3..bc60ab8d 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -201,13 +201,18 @@ struct PM4CmdNop { PM4Type3Header header; u32 data_block[0]; - enum class PayloadType : u32 { - DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope - DebugMarkerPop = 0x68750002, ///< End of GPU event scope - SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet - SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet - SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet - DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color + enum PayloadType : u32 { + DebugMarkerPush = 0x68750001u, ///< Begin of GPU event scope + DebugMarkerPop = 0x68750002u, ///< End of GPU event scope + SetVsharpInUdata = 0x68750004u, ///< Indicates that V# will be set in the next packet + SetTsharpInUdata = 0x68750005u, ///< Indicates that T# will be set in the next packet + SetSsharpInUdata = 0x68750006u, ///< Indicates that S# will be set in the next packet + DebugColorMarkerPush = 0x6875000eu, ///< Begin of GPU event scope with color + PatchedFlip = 0x68750776u, ///< Patched flip marker + PrepareFlip = 0x68750777u, ///< Flip marker + PrepareFlipLabel = 0x68750778u, ///< Flip marker with label address + PrepareFlipInterrupt = 0x68750780u, ///< Flip marker with interrupt + PrepareFlipInterruptLabel = 0x68750781u, ///< Flip marker with interrupt and label }; }; From c7cfe2622d1b3ce35cf5d31ced7a4f92417cab91 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 22:54:34 +0200 Subject: [PATCH 06/15] video_out: `sceVideoOutSubmitEopFlip` stub added --- src/core/libraries/videoout/video_out.cpp | 11 +++++++++++ src/core/libraries/videoout/video_out.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index d6acd86c..555ff083 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -216,6 +216,17 @@ void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { *label_addr = reinterpret_cast(port->buffer_labels.data()); } +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk) { + auto* port = driver->GetPort(handle); + if (!port) { + return 0x8029000b; + } + + // TODO + + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 9c7227e6..fc62bc9b 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -102,7 +102,9 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); void Flip(std::chrono::microseconds micros); void Vblank(); +// Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk); void RegisterLib(Core::Loader::SymbolsResolver* sym); From 498c5eb0cdbafc5ed2ae6ea558e704ee42392531 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 22:55:39 +0200 Subject: [PATCH 07/15] gnm_driver: `sceGnmSubmitAndFlipCommandBuffers` implementation --- src/core/libraries/gnmdriver/gnmdriver.cpp | 98 +++++++++++++++++++++- src/video_core/amdgpu/pm4_cmds.h | 14 ++++ 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index ddd96cea..7e9a041b 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1270,12 +1270,106 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() { return ORBIS_OK; } +static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf_idx, u32 flip_mode, + u32 flip_arg, void* unk) { + // check for `prepareFlip` packet + cmdbuf += size - 64; + ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet"); + + std::array backup{}; + std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type)); + + ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlip), + "Invalid flip packet"); + ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index"); + + const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode, + flip_arg, nullptr /*unk*/); + if (flip_result != 0) { + if (flip_result == 0x80290012) { + LOG_ERROR(Lib_GnmDriver, "Flip queue is full"); + return 0x80d11081; + } else { + LOG_ERROR(Lib_GnmDriver, "Flip request failed"); + return flip_result; + } + } + + uintptr_t label_addr{}; + VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr); + + // Write event to lock the VO surface + auto* write_lock = reinterpret_cast(cmdbuf); + write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; + write_lock->dst_sel.Assign(5u); + *reinterpret_cast(&write_lock->dst_addr_lo) = + (label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu; + write_lock->data[0] = 1; + + auto* nop = reinterpret_cast(cmdbuf + 5); + + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlip) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x39}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + } else { + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipLabel) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x34}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + // Write event to update label + auto* write_label = reinterpret_cast(cmdbuf + 0x3b); + write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; + write_label->dst_sel.Assign(5u); + write_label->dst_addr_lo = backup[2] & 0xffff'fffcu; + write_label->dst_addr_hi = backup[3]; + write_label->data[0] = backup[4]; + } + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterruptLabel) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + auto* write_eop = reinterpret_cast(cmdbuf + 0x3a); + write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4}; + write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000; + write_eop->address_lo = backup[2] & 0xffff'fffcu; + write_eop->data_control = (backup[3] & 0xffffu) | 0x2200'0000u; + write_eop->data_lo = backup[4]; + write_eop->data_hi = 0u; + } + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterrupt) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + auto* write_eop = reinterpret_cast(cmdbuf + 0x3a); + write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4}; + write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000; + write_eop->address_lo = 0u; + write_eop->data_control = 0x100'0000u; + write_eop->data_lo = 0u; + write_eop->data_hi = 0u; + } + } + + return ORBIS_OK; +} + s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, u32 flip_arg) { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; + LOG_INFO(Lib_GnmDriver, "called"); + + auto* cmdbuf = reinterpret_cast(dcb_gpu_addrs[count - 1]); + const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4; + + const s32 patch_result = + PatchFlipRequest(cmdbuf, size_dw, vo_handle, buf_idx, flip_mode, flip_arg, nullptr /*unk*/); + if (patch_result != ORBIS_OK) { + return patch_result; + } + + return sceGnmSubmitCommandBuffers(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs, + ccb_sizes_in_bytes); } int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index bc60ab8d..bddd277a 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -330,4 +330,18 @@ struct PM4CmdWaitRegMem { u32 poll_interval; }; +struct PM4CmdWriteData { + PM4Type3Header header; + union { + BitField<8, 11, u32> dst_sel; + BitField<16, 1, u32> wr_one_addr; + BitField<20, 1, u32> wr_confirm; + BitField<30, 1, u32> engine_sel; + u32 raw; + }; + u32 dst_addr_lo; + u32 dst_addr_hi; + u32 data[0]; +}; + } // namespace AmdGpu From bfb18135fb902eba27581f564519e8d1b058bb1e Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 8 May 2024 23:27:56 +0200 Subject: [PATCH 08/15] amdgpu: EOP irq and dummy PM4 handlers --- src/core/libraries/gnmdriver/gnmdriver.cpp | 2 +- src/video_core/amdgpu/liverpool.cpp | 75 ++++++++++++++++++---- src/video_core/amdgpu/pm4_cmds.h | 4 ++ src/video_core/amdgpu/pm4_opcodes.h | 2 +- 4 files changed, 69 insertions(+), 14 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 7e9a041b..dbe454b2 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -231,7 +231,7 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) { 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS - cmdbuf = WriteHeader( + cmdbuf = WriteHeader( cmdbuf, 6); // for some reason the packet indicates larger size cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 679cab90..c7db16ce 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -25,30 +25,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { case PM4ItOpcode::Nop: break; case PM4ItOpcode::SetContextReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetShReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetUconfigReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::IndexType: { - auto* index_type = reinterpret_cast(header); + const auto* index_type = reinterpret_cast(header); regs.index_buffer_type.raw = index_type->raw; break; } case PM4ItOpcode::DrawIndex2: { - auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.max_index_size = draw_index->max_size; regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); @@ -58,22 +58,73 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::DrawIndexAuto: { - auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; // rasterizer->DrawIndex(); break; } + case PM4ItOpcode::DispatchDirect: { + // const auto* dispatch_direct = reinterpret_cast(header); + break; + } + case PM4ItOpcode::EventWriteEos: { + // const auto* event_eos = reinterpret_cast(header); + break; + } case PM4ItOpcode::EventWriteEop: { - auto* event_write = reinterpret_cast(header); - const InterruptSelect irq_sel = event_write->int_sel; - const DataSelect data_sel = event_write->data_sel; - ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64); - *event_write->Address() = event_write->DataQWord(); + const auto* event_eop = reinterpret_cast(header); + const InterruptSelect irq_sel = event_eop->int_sel; + const DataSelect data_sel = event_eop->data_sel; + + // Write back data if required + switch (data_sel) { + case DataSelect::Data32Low: { + *reinterpret_cast(event_eop->Address()) = event_eop->DataDWord(); + break; + } + case DataSelect::Data64: { + *event_eop->Address() = event_eop->DataQWord(); + break; + } + default: { + UNREACHABLE(); + } + } + + switch (irq_sel) { + case InterruptSelect::None: { + // No interrupt + break; + } + case InterruptSelect::IrqWhenWriteConfirm: { + if (eop_callback) { + eop_callback(); + } else { + UNREACHABLE_MSG("EOP callback is not registered"); + } + break; + } + default: { + UNREACHABLE(); + } + } break; } case PM4ItOpcode::DmaData: { - auto* dma_data = reinterpret_cast(header); + const auto* dma_data = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WriteData: { + const auto* write_data = reinterpret_cast(header); + break; + } + case PM4ItOpcode::AcquireMem: { + // const auto* acquire_mem = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WaitRegMem: { + const auto* wait_reg_mem = reinterpret_cast(header); break; } default: diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index bddd277a..6ce06750 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -286,6 +286,10 @@ struct PM4CmdEventWriteEop { return reinterpret_cast(address_lo | u64(address_hi) << 32); } + u32 DataDWord() const { + return data_lo; + } + u64 DataQWord() const { return data_lo | u64(data_hi) << 32; } diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 2772716a..fb3fc8c5 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -49,7 +49,7 @@ enum class PM4ItOpcode : u32 { PremableCntl = 0x4A, DmaData = 0x50, ContextRegRmw = 0x51, - Unknown58 = 0x58, + AcquireMem = 0x58, LoadShReg = 0x5F, LoadConfigReg = 0x60, LoadContextReg = 0x61, From 8e0c67f12ea29215f4842b7d29bda99872809d72 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 9 May 2024 22:59:35 +0200 Subject: [PATCH 09/15] amdgpu: `wait_reg_mem` and `write_data` implementation Command list parsing is temporary moved to async task --- src/core/libraries/gnmdriver/gnmdriver.cpp | 24 +++++----- src/core/libraries/videoout/driver.cpp | 3 ++ src/video_core/amdgpu/liverpool.cpp | 15 +++++++ src/video_core/amdgpu/liverpool.h | 20 +++++++++ src/video_core/amdgpu/pm4_cmds.h | 52 ++++++++++++++++++++++ 5 files changed, 103 insertions(+), 11 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index dbe454b2..921babfa 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -27,7 +27,7 @@ template static inline u32* WriteTrailingNop(u32* cmdbuf) { auto* nop = reinterpret_cast(cmdbuf); nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1}; - nop->data_block[0] = 0; // only one out of `data_block_size` is initialized + nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized return cmdbuf + data_block_size + 1 /* header */; } @@ -48,9 +48,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { kernel_event.event.udata = udata; eq->addEvent(kernel_event); - liverpool->eop_callback = [=]() { - eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); - }; + liverpool->SetEopCallback( + [=]() { eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); }); return ORBIS_OK; } @@ -82,7 +81,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->ref = ref; wait_reg_mem->mask = mask; - wait_reg_mem->poll_interval = 10; + wait_reg_mem->poll_interval = 10u; WriteTrailingNop<2>(cmdbuf + 7); return ORBIS_OK; @@ -652,10 +651,10 @@ s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, auto* wait_reg_mem = reinterpret_cast(cmdbuf); wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; - wait_reg_mem->function.Assign(3u); - wait_reg_mem->mem_space.Assign(1u); + wait_reg_mem->function.Assign(PM4CmdWaitRegMem::Function::Equal); + wait_reg_mem->mem_space.Assign(PM4CmdWaitRegMem::MemSpace::Memory); *reinterpret_cast(&wait_reg_mem->poll_addr_lo) = - (label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu; + (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; wait_reg_mem->ref = 0u; wait_reg_mem->mask = 0xffff'ffffu; wait_reg_mem->poll_interval = 10u; @@ -1303,7 +1302,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; write_lock->dst_sel.Assign(5u); *reinterpret_cast(&write_lock->dst_addr_lo) = - (label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu; + (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; write_lock->data[0] = 1; auto* nop = reinterpret_cast(cmdbuf + 5); @@ -1405,7 +1404,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], } } - liverpool->ProcessCmdList(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); + liverpool->Submit(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); return ORBIS_OK; } @@ -1416,7 +1415,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() { } int PS4_SYSV_ABI sceGnmSubmitDone() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); + LOG_INFO(Lib_GnmDriver, "called"); + + liverpool->SubmitDone(); + return ORBIS_OK; } diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index e1a8b0e9..5e093c20 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -196,6 +196,9 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { reinterpret_cast(req.flip_arg)); } } + + // Reset flip label + req.port->buffer_labels[req.index] = 0; } bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index c7db16ce..5fbb1acb 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -117,6 +117,14 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::WriteData: { const auto* write_data = reinterpret_cast(header); + ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); + const u32 data_size = (header->type3.count.Value() - 2) * 4; + if (!write_data->wr_one_addr.Value()) { + std::memcpy(reinterpret_cast(write_data->Address()), write_data->data, + data_size); + } else { + UNREACHABLE(); + } break; } case PM4ItOpcode::AcquireMem: { @@ -125,6 +133,13 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); + ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + ASSERT(wait_reg_mem->function.Value() == PM4CmdWaitRegMem::Function::Equal); + + { + std::unique_lock lock{m_reg_mem}; + cv_reg_mem.wait(lock, [&]() { return wait_reg_mem->Test(); }); + } break; } default: diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 44c2a526..34cac432 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -3,11 +3,14 @@ #pragma once +#include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" #include +#include #include +#include namespace AmdGpu { @@ -612,9 +615,26 @@ struct Liverpool { public: Liverpool(); + void Submit(u32* cmdbuf, u32 size_in_bytes) { + ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending"); + cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes); + } + void SubmitDone() { + // This is wrong as `submitDone()` should never be blocking. The behavior will be + // reworked with mutiple queues introduction + cp.get(); + } + void SetEopCallback(auto const& cb) { + eop_callback = cb; + } + +private: void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); std::function eop_callback{}; + std::future cp{}; + std::condition_variable cv_reg_mem{}; + std::mutex m_reg_mem{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 6ce06750..762897fb 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -320,6 +320,19 @@ struct PM4DmaData { }; struct PM4CmdWaitRegMem { + enum Engine : u32 { Me = 0u, Pfp = 1u }; + enum MemSpace : u32 { Register = 0u, Memory = 1u }; + enum Function : u32 { + Always = 0u, + LessThan = 1u, + LessThanEqual = 2u, + Equal = 3u, + NotEqual = 4u, + GreaterThanEqual = 5u, + GreaterThan = 6u, + Reserved = 7u + }; + PM4Type3Header header; union { BitField<0, 3, u32> function; @@ -332,6 +345,41 @@ struct PM4CmdWaitRegMem { u32 ref; u32 mask; u32 poll_interval; + + u32* Address() const { + return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); + } + + bool Test() const { + switch (function.Value()) { + case Function::Always: { + return true; + } + case Function::LessThan: { + return (*Address() & mask) < ref; + } + case Function::LessThanEqual: { + return (*Address() & mask) <= ref; + } + case Function::Equal: { + return (*Address() & mask) == ref; + } + case Function::NotEqual: { + return (*Address() & mask) != ref; + } + case Function::GreaterThanEqual: { + return (*Address() & mask) >= ref; + } + case Function::GreaterThan: { + return (*Address() & mask) > ref; + } + case Function::Reserved: + [[fallthrough]]; + default: { + UNREACHABLE(); + } + } + } }; struct PM4CmdWriteData { @@ -346,6 +394,10 @@ struct PM4CmdWriteData { u32 dst_addr_lo; u32 dst_addr_hi; u32 data[0]; + + uintptr_t Address() const { + return (uintptr_t(dst_addr_hi) << 32) | dst_addr_lo; + } }; } // namespace AmdGpu From 581688c1ac18e698b4d9ebb879e2c3218d4133d9 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 11:55:04 +0200 Subject: [PATCH 10/15] amdgpu: EOS event packet handling --- src/video_core/amdgpu/liverpool.cpp | 11 ++++++- src/video_core/amdgpu/pm4_cmds.h | 51 +++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5fbb1acb..2e4566f5 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -69,7 +69,16 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::EventWriteEos: { - // const auto* event_eos = reinterpret_cast(header); + const auto* event_eos = reinterpret_cast(header); + switch (event_eos->command.Value()) { + case PM4CmdEventWriteEos::Command::SingalFence: { + event_eos->SignalFence(); + break; + } + default: { + UNREACHABLE(); + } + } break; } case PM4ItOpcode::EventWriteEop: { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 762897fb..91e67a59 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -320,9 +320,9 @@ struct PM4DmaData { }; struct PM4CmdWaitRegMem { - enum Engine : u32 { Me = 0u, Pfp = 1u }; - enum MemSpace : u32 { Register = 0u, Memory = 1u }; - enum Function : u32 { + enum class Engine : u32 { Me = 0u, Pfp = 1u }; + enum class MemSpace : u32 { Register = 0u, Memory = 1u }; + enum class Function : u32 { Always = 0u, LessThan = 1u, LessThanEqual = 2u, @@ -335,9 +335,9 @@ struct PM4CmdWaitRegMem { PM4Type3Header header; union { - BitField<0, 3, u32> function; - BitField<4, 1, u32> mem_space; - BitField<8, 1, u32> engine; + BitField<0, 3, Function> function; + BitField<4, 1, MemSpace> mem_space; + BitField<8, 1, Engine> engine; u32 raw; }; u32 poll_addr_lo; @@ -400,4 +400,43 @@ struct PM4CmdWriteData { } }; +struct PM4CmdEventWriteEos { + enum class Command : u32 { + GdsStore = 1u, + SingalFence = 2u, + }; + + PM4Type3Header header; + union { + u32 event_control; + BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR + BitField<8, 4, u32> event_index; ///< Event index + }; + u32 address_lo; + union { + u32 cmd_info; + BitField<0, 16, u32> address_hi; ///< High bits of address + BitField<29, 3, Command> command; ///< Command + }; + union { + u32 data; ///< Fence value that will be written to memory when event occurs + BitField<0, 16, u32> + gds_index; ///< Indexed offset from the start of the segment within the partition + BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS + }; + + u32* Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); + } + + u32 DataDWord() const { + return this->data; + } + + void SignalFence() const { + ASSERT_MSG(command.Value() == Command::SingalFence, "Invalid action on packet"); + *Address() = DataDWord(); + } +}; + } // namespace AmdGpu From 923baf0164a95b6d069b1986d51545446f6cb37d Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 21:48:01 +0200 Subject: [PATCH 11/15] core: gpu interrupt dispatcher --- src/core/libraries/gnmdriver/gnmdriver.cpp | 13 +++- src/core/libraries/videoout/driver.cpp | 9 ++- src/core/libraries/videoout/driver.h | 8 ++- src/core/libraries/videoout/video_out.cpp | 9 ++- src/core/libraries/videoout/video_out.h | 2 +- src/core/platform.h | 78 ++++++++++++++++++++++ src/video_core/amdgpu/liverpool.cpp | 57 +++------------- src/video_core/amdgpu/liverpool.h | 6 -- src/video_core/amdgpu/pm4_cmds.h | 47 +++++++++++-- 9 files changed, 161 insertions(+), 68 deletions(-) create mode 100644 src/core/platform.h diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 921babfa..d65d03d0 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -7,6 +7,7 @@ #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" +#include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -48,8 +49,12 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { kernel_event.event.udata = udata; eq->addEvent(kernel_event); - liverpool->SetEopCallback( - [=]() { eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); }); + Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, + "An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper + // filtering in trigger function + eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); + }); return ORBIS_OK; } @@ -158,6 +163,8 @@ s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) { } eq->removeEvent(id); + + Platform::IrqC::Instance()->Unregister(); return ORBIS_OK; } @@ -1356,7 +1363,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addr u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, u32 flip_arg) { - LOG_INFO(Lib_GnmDriver, "called"); + LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx); auto* cmdbuf = reinterpret_cast(dcb_gpu_addrs[count - 1]); const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 5e093c20..5a5d69a3 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -6,6 +6,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -199,16 +200,19 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { // Reset flip label req.port->buffer_labels[req.index] = 0; + LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index); } -bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { +bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; auto* frame = renderer->PrepareFrame(group, buffer.address_left); std::scoped_lock lock{mutex}; - if (requests.size() >= 2) { + if (requests.size() >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); return false; } @@ -218,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { .index = index, .flip_arg = flip_arg, .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), + .eop = is_eop, }); port->flip_status.flipPendingNum = static_cast(requests.size()); diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 0c1ea582..f8b9ea81 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -34,6 +34,11 @@ struct VideoOutPort { } return index; } + + [[nodiscard]] int NumRegisteredBuffers() const { + return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(), + [](auto& buffer) { return buffer.group_index != -1; }); + } }; struct ServiceThreadParams { @@ -59,7 +64,7 @@ public: int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); void Flip(std::chrono::microseconds timeout); - bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg); + bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void Vblank(); @@ -70,6 +75,7 @@ private: s32 index; s64 flip_arg; u64 submit_tsc; + bool eop; }; std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 555ff083..e5995ab2 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -10,6 +10,7 @@ #include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/video_out.h" #include "core/loader/symbols_resolver.h" +#include "core/platform.h" namespace Libraries::VideoOut { @@ -216,13 +217,17 @@ void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { *label_addr = reinterpret_cast(port->buffer_labels.data()); } -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk) { +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) { auto* port = driver->GetPort(handle); if (!port) { return 0x8029000b; } - // TODO + Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured"); + const auto result = driver->SubmitFlip(port, buf_id, arg, true); + ASSERT_MSG(result, "EOP flip submission failed"); + }); return ORBIS_OK; } diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index fc62bc9b..00ea6afb 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -104,7 +104,7 @@ void Vblank(); // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk); +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk); void RegisterLib(Core::Loader::SymbolsResolver* sym); diff --git a/src/core/platform.h b/src/core/platform.h new file mode 100644 index 00000000..442a6d9e --- /dev/null +++ b/src/core/platform.h @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/singleton.h" +#include "common/types.h" +#include "magic_enum.hpp" + +#include +#include +#include +#include + +namespace Platform { + +enum class InterruptId : u32 { + Compute0RelMem = 0u, + Compute1RelMem = 1u, + Compute2RelMem = 2u, + Compute3RelMem = 3u, + Compute4RelMem = 4u, + Compute5RelMem = 5u, + Compute6RelMem = 6u, + GfxEop = 0x40u +}; + +using IrqHandler = std::function; + +struct IrqController { + void RegisterOnce(IrqHandler handler) { + std::unique_lock lock{m_lock}; + one_time_subscribers.emplace(handler); + } + + void Register(IrqHandler handler) { + ASSERT_MSG(!persistent_handler.has_value(), + "Too many persistent handlers"); // Add a slot map if so + { + std::unique_lock lock{m_lock}; + persistent_handler.emplace(handler); + } + } + + void Unregister() { + std::unique_lock lock{m_lock}; + persistent_handler.reset(); + } + + void Signal(InterruptId irq) { + LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq)); + { + std::unique_lock lock{m_lock}; + + if (persistent_handler) { + persistent_handler.value()(irq); + } + + while (!one_time_subscribers.empty()) { + const auto& h = one_time_subscribers.front(); + h(irq); + + one_time_subscribers.pop(); + } + } + } + +private: + std::optional persistent_handler{}; + std::queue one_time_subscribers{}; + std::mutex m_lock{}; +}; + +using IrqC = Common::Singleton; + +} // namespace Platform diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2e4566f5..f41f4bb3 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/io_file.h" +#include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -11,6 +12,8 @@ namespace AmdGpu { Liverpool::Liverpool() = default; void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { + Common::SetCurrentThreadName("CommandProcessor_Gfx"); + auto* header = reinterpret_cast(cmdbuf); u32 processed_cmd_size = 0; @@ -70,54 +73,12 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::EventWriteEos: { const auto* event_eos = reinterpret_cast(header); - switch (event_eos->command.Value()) { - case PM4CmdEventWriteEos::Command::SingalFence: { - event_eos->SignalFence(); - break; - } - default: { - UNREACHABLE(); - } - } + event_eos->SignalFence(); break; } case PM4ItOpcode::EventWriteEop: { const auto* event_eop = reinterpret_cast(header); - const InterruptSelect irq_sel = event_eop->int_sel; - const DataSelect data_sel = event_eop->data_sel; - - // Write back data if required - switch (data_sel) { - case DataSelect::Data32Low: { - *reinterpret_cast(event_eop->Address()) = event_eop->DataDWord(); - break; - } - case DataSelect::Data64: { - *event_eop->Address() = event_eop->DataQWord(); - break; - } - default: { - UNREACHABLE(); - } - } - - switch (irq_sel) { - case InterruptSelect::None: { - // No interrupt - break; - } - case InterruptSelect::IrqWhenWriteConfirm: { - if (eop_callback) { - eop_callback(); - } else { - UNREACHABLE_MSG("EOP callback is not registered"); - } - break; - } - default: { - UNREACHABLE(); - } - } + event_eop->SignalFence(); break; } case PM4ItOpcode::DmaData: { @@ -143,11 +104,9 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); - ASSERT(wait_reg_mem->function.Value() == PM4CmdWaitRegMem::Function::Equal); - - { - std::unique_lock lock{m_reg_mem}; - cv_reg_mem.wait(lock, [&]() { return wait_reg_mem->Test(); }); + while (!wait_reg_mem->Test()) { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); } break; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 34cac432..2e0030fd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -624,17 +624,11 @@ public: // reworked with mutiple queues introduction cp.get(); } - void SetEopCallback(auto const& cb) { - eop_callback = cb; - } private: void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); - std::function eop_callback{}; std::future cp{}; - std::condition_variable cv_reg_mem{}; - std::mutex m_reg_mem{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 91e67a59..c9870168 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -6,6 +6,7 @@ #include #include "common/bit_field.h" #include "common/types.h" +#include "core/platform.h" #include "video_core/amdgpu/pm4_opcodes.h" namespace AmdGpu { @@ -282,8 +283,9 @@ struct PM4CmdEventWriteEop { u32 data_lo; ///< Value that will be written to memory when event occurs u32 data_hi; ///< Value that will be written to memory when event occurs - u64* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + template + T* Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); } u32 DataDWord() const { @@ -293,6 +295,36 @@ struct PM4CmdEventWriteEop { u64 DataQWord() const { return data_lo | u64(data_hi) << 32; } + + void SignalFence() const { + switch (data_sel.Value()) { + case DataSelect::Data32Low: { + *Address() = DataDWord(); + break; + } + case DataSelect::Data64: { + *Address() = DataQWord(); + break; + } + default: { + UNREACHABLE(); + } + } + + switch (int_sel.Value()) { + case InterruptSelect::None: { + // No interrupt + break; + } + case InterruptSelect::IrqWhenWriteConfirm: { + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); + break; + } + default: { + UNREACHABLE(); + } + } + } }; struct PM4DmaData { @@ -434,8 +466,15 @@ struct PM4CmdEventWriteEos { } void SignalFence() const { - ASSERT_MSG(command.Value() == Command::SingalFence, "Invalid action on packet"); - *Address() = DataDWord(); + switch (command.Value()) { + case Command::SingalFence: { + *Address() = DataDWord(); + break; + } + default: { + UNREACHABLE(); + } + } } }; From 59e7bbdec47957f8867adeaa81b1939e0424e1bb Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 22:04:41 +0200 Subject: [PATCH 12/15] kernel: `sceKernelDeleteEqueue` added --- src/core/libraries/kernel/event_queues.cpp | 21 +++++++++++++-------- src/core/libraries/kernel/event_queues.h | 1 + src/core/libraries/kernel/libkernel.cpp | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index 1cb109ac..4e239784 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -11,29 +11,34 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { if (eq == nullptr) { LOG_ERROR(Kernel_Event, "Event queue is null!"); - return SCE_KERNEL_ERROR_EINVAL; + return ORBIS_KERNEL_ERROR_EINVAL; } if (name == nullptr) { - LOG_ERROR(Kernel_Event, "Event queue name is invalid!"); - return SCE_KERNEL_ERROR_EFAULT; - } - if (name == NULL) { LOG_ERROR(Kernel_Event, "Event queue name is null!"); - return SCE_KERNEL_ERROR_EINVAL; + return ORBIS_KERNEL_ERROR_EINVAL; } // Maximum is 32 including null terminator static constexpr size_t MaxEventQueueNameSize = 32; if (std::strlen(name) > MaxEventQueueNameSize) { LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!"); - return SCE_KERNEL_ERROR_ENAMETOOLONG; + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; } LOG_INFO(Kernel_Event, "name = {}", name); *eq = new EqueueInternal; (*eq)->setName(std::string(name)); - return SCE_OK; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) { + if (eq == nullptr) { + return SCE_KERNEL_ERROR_EBADF; + } + + delete eq; + return ORBIS_OK; } int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index e8cb35a3..df78f9ce 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -11,6 +11,7 @@ using SceKernelUseconds = u32; using SceKernelEqueue = EqueueInternal*; int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name); +int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq); int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, SceKernelUseconds* timo); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index bfda21d0..30e8cad1 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -169,6 +169,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); + LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue); // misc LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); From b849b074427b8651410585dedbc6edcfb898bee1 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 22:58:53 +0200 Subject: [PATCH 13/15] gnm_driver: fixed a typo and fields initialization --- src/core/libraries/gnmdriver/gnmdriver.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index d65d03d0..ba6142a0 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -658,8 +658,7 @@ s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, auto* wait_reg_mem = reinterpret_cast(cmdbuf); wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; - wait_reg_mem->function.Assign(PM4CmdWaitRegMem::Function::Equal); - wait_reg_mem->mem_space.Assign(PM4CmdWaitRegMem::MemSpace::Memory); + wait_reg_mem->raw = 0x13u; *reinterpret_cast(&wait_reg_mem->poll_addr_lo) = (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; wait_reg_mem->ref = 0u; @@ -1285,7 +1284,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf std::array backup{}; std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type)); - ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlip), + ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlipLabel), "Invalid flip packet"); ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index"); @@ -1307,7 +1306,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf // Write event to lock the VO surface auto* write_lock = reinterpret_cast(cmdbuf); write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; - write_lock->dst_sel.Assign(5u); + write_lock->raw = 0x500u; *reinterpret_cast(&write_lock->dst_addr_lo) = (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; write_lock->data[0] = 1; @@ -1325,7 +1324,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf // Write event to update label auto* write_label = reinterpret_cast(cmdbuf + 0x3b); write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; - write_label->dst_sel.Assign(5u); + write_label->raw = 0x500u; write_label->dst_addr_lo = backup[2] & 0xffff'fffcu; write_label->dst_addr_hi = backup[3]; write_label->data[0] = backup[4]; From 37014394fd3c783264f69cd3b4ebd5cd32a240c2 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 23:16:56 +0200 Subject: [PATCH 14/15] amdgpu: return perfcounter on EOP --- src/video_core/amdgpu/pm4_cmds.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index c9870168..241367b7 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -5,6 +5,7 @@ #include #include "common/bit_field.h" +#include "common/rdtsc.h" #include "common/types.h" #include "core/platform.h" #include "video_core/amdgpu/pm4_opcodes.h" @@ -306,6 +307,10 @@ struct PM4CmdEventWriteEop { *Address() = DataQWord(); break; } + case DataSelect::PerfCounter: { + *Address() = Common::FencedRDTSC(); + break; + } default: { UNREACHABLE(); } From d804a66aa22d2e695ac3df51c5d0a9e7ca05d2f8 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 23:51:24 +0200 Subject: [PATCH 15/15] review comments applied --- src/core/libraries/gnmdriver/gnmdriver.cpp | 4 ++-- src/core/libraries/kernel/event_queue.cpp | 4 ++-- src/core/platform.h | 26 ++++++++++------------ src/video_core/amdgpu/liverpool.cpp | 3 +-- src/video_core/amdgpu/pm4_cmds.h | 19 ++++++++++++---- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index ba6142a0..141aff6f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1307,8 +1307,8 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf auto* write_lock = reinterpret_cast(cmdbuf); write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; write_lock->raw = 0x500u; - *reinterpret_cast(&write_lock->dst_addr_lo) = - (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; + const auto addr = (label_addr + buf_idx * sizeof(label_addr)) & ~0x3ull; + write_lock->Address(addr); write_lock->data[0] = 1; auto* nop = reinterpret_cast(cmdbuf + 5); diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index d1004f61..023811da 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -21,8 +21,8 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { } int EqueueInternal::removeEvent(u64 id) { - const auto& event_q = std::find_if(m_events.cbegin(), m_events.cend(), - [id](auto& ev) { return ev.event.ident == id; }); + const auto& event_q = + std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; }); ASSERT(event_q != m_events.cend()); m_events.erase(event_q); return 0; diff --git a/src/core/platform.h b/src/core/platform.h index 442a6d9e..4d3f4b96 100644 --- a/src/core/platform.h +++ b/src/core/platform.h @@ -38,10 +38,9 @@ struct IrqController { void Register(IrqHandler handler) { ASSERT_MSG(!persistent_handler.has_value(), "Too many persistent handlers"); // Add a slot map if so - { - std::unique_lock lock{m_lock}; - persistent_handler.emplace(handler); - } + + std::unique_lock lock{m_lock}; + persistent_handler.emplace(handler); } void Unregister() { @@ -50,20 +49,19 @@ struct IrqController { } void Signal(InterruptId irq) { + std::unique_lock lock{m_lock}; + LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq)); - { - std::unique_lock lock{m_lock}; - if (persistent_handler) { - persistent_handler.value()(irq); - } + if (persistent_handler) { + persistent_handler.value()(irq); + } - while (!one_time_subscribers.empty()) { - const auto& h = one_time_subscribers.front(); - h(irq); + while (!one_time_subscribers.empty()) { + const auto& h = one_time_subscribers.front(); + h(irq); - one_time_subscribers.pop(); - } + one_time_subscribers.pop(); } } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index f41f4bb3..58e36017 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -90,8 +90,7 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); const u32 data_size = (header->type3.count.Value() - 2) * 4; if (!write_data->wr_one_addr.Value()) { - std::memcpy(reinterpret_cast(write_data->Address()), write_data->data, - data_size); + std::memcpy(write_data->Address(), write_data->data, data_size); } else { UNREACHABLE(); } diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 241367b7..e26830cd 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -428,12 +428,23 @@ struct PM4CmdWriteData { BitField<30, 1, u32> engine_sel; u32 raw; }; - u32 dst_addr_lo; - u32 dst_addr_hi; + union { + struct { + u32 dst_addr_lo; + u32 dst_addr_hi; + }; + u64 addr64; + }; u32 data[0]; - uintptr_t Address() const { - return (uintptr_t(dst_addr_hi) << 32) | dst_addr_lo; + template + void Address(T addr) { + addr64 = reinterpret_cast(addr); + } + + template + T* Address() const { + return reinterpret_cast(addr64); } };