diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b0706dfd..141aff6f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -6,6 +6,8 @@ #include "core/libraries/error_codes.h" #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" +#include "core/libraries/videoout/video_out.h" +#include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -26,12 +28,33 @@ template static inline u32* WriteTrailingNop(u32* cmdbuf) { auto* nop = reinterpret_cast(cmdbuf); nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1}; - nop->data_block[0] = 0; // only one out of `data_block_size` is initialized + nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized return cmdbuf + data_block_size + 1 /* header */; } -int PS4_SYSV_ABI sceGnmAddEqEvent() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { + LOG_TRACE(Lib_GnmDriver, "called"); + ASSERT_MSG(id == SceKernelEvent::Type::GfxEop); + + if (!eq) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + EqueueEvent kernel_event{}; + kernel_event.event.ident = id; + kernel_event.event.filter = EVFILT_GRAPHICS_CORE; + kernel_event.event.flags = 1; + kernel_event.event.fflags = 0; + kernel_event.event.data = id; + kernel_event.event.udata = udata; + eq->addEvent(kernel_event); + + Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, + "An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper + // filtering in trigger function + eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); + }); return ORBIS_OK; } @@ -63,7 +86,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->ref = ref; wait_reg_mem->mask = mask; - wait_reg_mem->poll_interval = 10; + wait_reg_mem->poll_interval = 10u; WriteTrailingNop<2>(cmdbuf + 7); return ORBIS_OK; @@ -131,8 +154,17 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDeleteEqEvent() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) { + LOG_TRACE(Lib_GnmDriver, "called"); + ASSERT_MSG(id == SceKernelEvent::Type::GfxEop); + + if (!eq) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + eq->removeEvent(id); + + Platform::IrqC::Instance()->Unregister(); return ORBIS_OK; } @@ -205,14 +237,14 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) { 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS - cmdbuf = WriteHeader( + cmdbuf = WriteHeader( cmdbuf, 6); // for some reason the packet indicates larger size cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u); cmdbuf = WriteHeader(cmdbuf, 0xef); cmdbuf = WriteBody(cmdbuf, 0xau, 0u); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -321,7 +353,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -335,7 +367,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -565,9 +597,9 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) { LOG_TRACE(Lib_GnmDriver, "called"); if (cmdbuf && (size == 6)) { - cmdbuf = WritePacket( - cmdbuf, PM4ShaderType::ShaderGraphics, - static_cast(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u); + cmdbuf = + WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, + PM4CmdNop::PayloadType::DebugMarkerPop, 0u, 0u, 0u, 0u); return ORBIS_OK; } return -1; @@ -588,7 +620,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke auto* nop = reinterpret_cast(cmdbuf); nop->header = PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics}; - nop->data_block[0] = static_cast(PM4CmdNop::PayloadType::DebugMarkerPush); + nop->data_block[0] = PM4CmdNop::PayloadType::DebugMarkerPush; const auto marker_len = len + 1; std::memcpy(&nop->data_block[1], marker, marker_len); std::memset(reinterpret_cast(&nop->data_block[1]) + marker_len, 0, @@ -614,8 +646,24 @@ int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmInsertWaitFlipDone() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (size != 7) { + return -1; + } + + uintptr_t label_addr{}; + VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr); + + auto* wait_reg_mem = reinterpret_cast(cmdbuf); + wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; + wait_reg_mem->raw = 0x13u; + *reinterpret_cast(&wait_reg_mem->poll_addr_lo) = + (label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull; + wait_reg_mem->ref = 0u; + wait_reg_mem->mask = 0xffff'ffffu; + wait_reg_mem->poll_interval = 10u; return ORBIS_OK; } @@ -824,8 +872,48 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + // A fullscreen triangle with one uv set + const static u32 shader_code[] = { + 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 + 0x36020081u, // v_and_b32 v1, 1, v0 + 0x34020281u, // v_lshlrev_b32 v1, 1, v1 + 0x360000c2u, // v_and_b32 v0, -2, v0 + 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 + 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 + 0x7e020b01u, // v_cvt_f32_i32 v1, v1 + 0x7e040280u, // v_cvt_f32_i32 v0, v0 + 0x7e0602f2u, // v_mov_b32 v3, 1.0 + 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done + 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 + 0xbf810000u, // s_endpgm + + // OrbShdr header + 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u}; + + const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10 + const static u32 vs_regs[] = { + u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; + + if (shader_id != 0) { + return 0x8eee00ff; + } + + // Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has + // a check for zero in the upper part of shader address. In our case, the address is a + // pointer to a stack memory, so the check will likely fail. To workaround it we will + // repeat set shader functionality here as it is trivial. + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -960,6 +1048,8 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() { } s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + if (!cmdbuf || size <= 0x1c) { return -1; } @@ -987,7 +1077,6 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3 cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT WriteTrailingNop<11>(cmdbuf); - return ORBIS_OK; } @@ -1186,44 +1275,142 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf_idx, u32 flip_mode, + u32 flip_arg, void* unk) { + // check for `prepareFlip` packet + cmdbuf += size - 64; + ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet"); + + std::array backup{}; + std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type)); + + ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlipLabel), + "Invalid flip packet"); + ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index"); + + const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode, + flip_arg, nullptr /*unk*/); + if (flip_result != 0) { + if (flip_result == 0x80290012) { + LOG_ERROR(Lib_GnmDriver, "Flip queue is full"); + return 0x80d11081; + } else { + LOG_ERROR(Lib_GnmDriver, "Flip request failed"); + return flip_result; + } + } + + uintptr_t label_addr{}; + VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr); + + // Write event to lock the VO surface + auto* write_lock = reinterpret_cast(cmdbuf); + write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; + write_lock->raw = 0x500u; + const auto addr = (label_addr + buf_idx * sizeof(label_addr)) & ~0x3ull; + write_lock->Address(addr); + write_lock->data[0] = 1; + + auto* nop = reinterpret_cast(cmdbuf + 5); + + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlip) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x39}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + } else { + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipLabel) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x34}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + // Write event to update label + auto* write_label = reinterpret_cast(cmdbuf + 0x3b); + write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3}; + write_label->raw = 0x500u; + write_label->dst_addr_lo = backup[2] & 0xffff'fffcu; + write_label->dst_addr_hi = backup[3]; + write_label->data[0] = backup[4]; + } + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterruptLabel) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + auto* write_eop = reinterpret_cast(cmdbuf + 0x3a); + write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4}; + write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000; + write_eop->address_lo = backup[2] & 0xffff'fffcu; + write_eop->data_control = (backup[3] & 0xffffu) | 0x2200'0000u; + write_eop->data_lo = backup[4]; + write_eop->data_hi = 0u; + } + if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterrupt) { + nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33}; + nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip; + + auto* write_eop = reinterpret_cast(cmdbuf + 0x3a); + write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4}; + write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000; + write_eop->address_lo = 0u; + write_eop->data_control = 0x100'0000u; + write_eop->data_lo = 0u; + write_eop->data_hi = 0u; + } + } + return ORBIS_OK; } +s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, u32 vo_handle, + u32 buf_idx, u32 flip_mode, u32 flip_arg) { + LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx); + + auto* cmdbuf = reinterpret_cast(dcb_gpu_addrs[count - 1]); + const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4; + + const s32 patch_result = + PatchFlipRequest(cmdbuf, size_dw, vo_handle, buf_idx, flip_mode, flip_arg, nullptr /*unk*/); + if (patch_result != ORBIS_OK) { + return patch_result; + } + + return sceGnmSubmitCommandBuffers(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs, + ccb_sizes_in_bytes); +} + int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() { LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes, - void* ccbGpuAddrs[], u32* ccbSizesInBytes) { +s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes) { LOG_INFO(Lib_GnmDriver, "called"); ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!"); - if (!dcbGpuAddrs || !dcbSizesInBytes) { + if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) { LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL"); return 0x80d11000; } for (u32 i = 0; i < count; i++) { - if (dcbSizesInBytes[i] == 0) { + if (dcb_sizes_in_bytes[i] == 0) { LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i); return 0x80d11000; } - if (dcbSizesInBytes[i] > 0x3ffffc) { + if (dcb_sizes_in_bytes[i] > 0x3ffffc) { LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, - dcbSizesInBytes[i]); + dcb_sizes_in_bytes[i]); return 0x80d11000; } - if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) { + if (ccb_sizes_in_bytes && ccb_sizes_in_bytes[i] > 0x3ffffc) { LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, - ccbSizesInBytes[i]); + ccb_sizes_in_bytes[i]); return 0x80d11000; } } - liverpool->ProcessCmdList(reinterpret_cast(dcbGpuAddrs[0]), dcbSizesInBytes[0]); + liverpool->Submit(reinterpret_cast(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]); return ORBIS_OK; } @@ -1234,7 +1421,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() { } int PS4_SYSV_ABI sceGnmSubmitDone() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); + LOG_INFO(Lib_GnmDriver, "called"); + + liverpool->SubmitDone(); + return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 1f25649a..c1aeef8e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -4,6 +4,7 @@ #pragma once #include "common/types.h" +#include "core/libraries/kernel/event_queues.h" namespace Core::Loader { class SymbolsResolver; @@ -11,7 +12,9 @@ class SymbolsResolver; namespace Libraries::GnmDriver { -int PS4_SYSV_ABI sceGnmAddEqEvent(); +using namespace Kernel; + +s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata); int PS4_SYSV_ABI sceGnmAreSubmitsAllowed(); int PS4_SYSV_ABI sceGnmBeginWorkload(); s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask, @@ -28,7 +31,7 @@ int PS4_SYSV_ABI sceGnmDebuggerSetAddressWatch(); int PS4_SYSV_ABI sceGnmDebuggerWriteGds(); int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister(); int PS4_SYSV_ABI sceGnmDebugHardwareStatus(); -int PS4_SYSV_ABI sceGnmDeleteEqEvent(); +s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id); int PS4_SYSV_ABI sceGnmDestroyWorkloadStream(); int PS4_SYSV_ABI sceGnmDingDong(); int PS4_SYSV_ABI sceGnmDingDongForWorkload(); @@ -104,7 +107,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); int PS4_SYSV_ABI sceGnmInsertSetMarker(); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); -int PS4_SYSV_ABI sceGnmInsertWaitFlipDone(); +s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx); int PS4_SYSV_ABI sceGnmIsCoredumpValid(); int PS4_SYSV_ABI sceGnmIsUserPaEnabled(); int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex(); @@ -137,7 +140,7 @@ s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs); s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs, u32 modifier); int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(); -int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier); int PS4_SYSV_ABI sceGnmSetEsShader(); int PS4_SYSV_ABI sceGnmSetGsRingSizes(); int PS4_SYSV_ABI sceGnmSetGsShader(); @@ -191,9 +194,12 @@ int PS4_SYSV_ABI sceGnmSqttStopTrace(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2(); int PS4_SYSV_ABI sceGnmSqttWaitForEvent(); -int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(); +s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, u32 vo_handle, + u32 buf_idx, u32 flip_mode, u32 flip_arg); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(); -int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], +s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes); int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(); diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 8fd70106..023811da 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -20,6 +20,14 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { return 0; } +int EqueueInternal::removeEvent(u64 id) { + const auto& event_q = + std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; }); + ASSERT(event_q != m_events.cend()); + m_events.erase(event_q); + return 0; +} + int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { std::unique_lock lock{m_mutex}; int ret = 0; diff --git a/src/core/libraries/kernel/event_queue.h b/src/core/libraries/kernel/event_queue.h index 12151a0c..745a0ac0 100644 --- a/src/core/libraries/kernel/event_queue.h +++ b/src/core/libraries/kernel/event_queue.h @@ -42,11 +42,22 @@ using ResetFunc = void (*)(EqueueEvent* event); using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event); struct SceKernelEvent { + enum Type : u64 { + Compute0RelMem = 0x00, + Compute1RelMem = 0x01, + Compute2RelMem = 0x02, + Compute3RelMem = 0x03, + Compute4RelMem = 0x04, + Compute5RelMem = 0x05, + Compute6RelMem = 0x06, + GfxEop = 0x40 + }; + u64 ident = 0; /* identifier for this event */ s16 filter = 0; /* filter for event */ u16 flags = 0; u32 fflags = 0; - s64 data = 0; + u64 data = 0; void* udata = nullptr; /* opaque user data identifier */ }; @@ -80,6 +91,7 @@ public: this->m_name = m_name; } int addEvent(const EqueueEvent& event); + int removeEvent(u64 id); int waitForEvents(SceKernelEvent* ev, int num, u32 micros); bool triggerEvent(u64 ident, s16 filter, void* trigger_data); int getTriggeredEvents(SceKernelEvent* ev, int num); diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index 1cb109ac..4e239784 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -11,29 +11,34 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { if (eq == nullptr) { LOG_ERROR(Kernel_Event, "Event queue is null!"); - return SCE_KERNEL_ERROR_EINVAL; + return ORBIS_KERNEL_ERROR_EINVAL; } if (name == nullptr) { - LOG_ERROR(Kernel_Event, "Event queue name is invalid!"); - return SCE_KERNEL_ERROR_EFAULT; - } - if (name == NULL) { LOG_ERROR(Kernel_Event, "Event queue name is null!"); - return SCE_KERNEL_ERROR_EINVAL; + return ORBIS_KERNEL_ERROR_EINVAL; } // Maximum is 32 including null terminator static constexpr size_t MaxEventQueueNameSize = 32; if (std::strlen(name) > MaxEventQueueNameSize) { LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!"); - return SCE_KERNEL_ERROR_ENAMETOOLONG; + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; } LOG_INFO(Kernel_Event, "name = {}", name); *eq = new EqueueInternal; (*eq)->setName(std::string(name)); - return SCE_OK; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) { + if (eq == nullptr) { + return SCE_KERNEL_ERROR_EBADF; + } + + delete eq; + return ORBIS_OK; } int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index e8cb35a3..df78f9ce 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -11,6 +11,7 @@ using SceKernelUseconds = u32; using SceKernelEqueue = EqueueInternal*; int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name); +int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq); int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, SceKernelUseconds* timo); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index bfda21d0..30e8cad1 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -169,6 +169,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); + LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue); // misc LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index e1a8b0e9..5a5d69a3 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -6,6 +6,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -196,16 +197,22 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { reinterpret_cast(req.flip_arg)); } } + + // Reset flip label + req.port->buffer_labels[req.index] = 0; + LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index); } -bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { +bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; auto* frame = renderer->PrepareFrame(group, buffer.address_left); std::scoped_lock lock{mutex}; - if (requests.size() >= 2) { + if (requests.size() >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); return false; } @@ -215,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { .index = index, .flip_arg = flip_arg, .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), + .eop = is_eop, }); port->flip_status.flipPendingNum = static_cast(requests.size()); diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index fac12135..f8b9ea81 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -19,6 +19,8 @@ struct VideoOutPort { bool is_open = false; SceVideoOutResolutionStatus resolution; std::array buffer_slots; + std::array buffer_labels; // should be contiguous in memory + static_assert(sizeof(buffer_labels[0]) == 8u); std::array groups; FlipStatus flip_status; SceVideoOutVblankStatus vblank_status; @@ -32,6 +34,11 @@ struct VideoOutPort { } return index; } + + [[nodiscard]] int NumRegisteredBuffers() const { + return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(), + [](auto& buffer) { return buffer.group_index != -1; }); + } }; struct ServiceThreadParams { @@ -57,7 +64,7 @@ public: int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); void Flip(std::chrono::microseconds timeout); - bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg); + bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void Vblank(); @@ -68,6 +75,7 @@ private: s32 index; s64 flip_arg; u64 submit_tsc; + bool eop; }; std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 363bd538..e5995ab2 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -10,6 +10,7 @@ #include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/video_out.h" #include "core/loader/symbols_resolver.h" +#include "core/platform.h" namespace Libraries::VideoOut { @@ -210,6 +211,27 @@ void Vblank() { return driver->Vblank(); } +void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { + auto* port = driver->GetPort(handle); + ASSERT(port); + *label_addr = reinterpret_cast(port->buffer_labels.data()); +} + +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) { + auto* port = driver->GetPort(handle); + if (!port) { + return 0x8029000b; + } + + Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured"); + const auto result = driver->SubmitFlip(port, buf_id, arg, true); + ASSERT_MSG(result, "EOP flip submission failed"); + }); + + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 951eb6a5..00ea6afb 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -102,6 +102,10 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); void Flip(std::chrono::microseconds micros); void Vblank(); +// Internal system functions +void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk); + void RegisterLib(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::VideoOut diff --git a/src/core/platform.h b/src/core/platform.h new file mode 100644 index 00000000..4d3f4b96 --- /dev/null +++ b/src/core/platform.h @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/singleton.h" +#include "common/types.h" +#include "magic_enum.hpp" + +#include +#include +#include +#include + +namespace Platform { + +enum class InterruptId : u32 { + Compute0RelMem = 0u, + Compute1RelMem = 1u, + Compute2RelMem = 2u, + Compute3RelMem = 3u, + Compute4RelMem = 4u, + Compute5RelMem = 5u, + Compute6RelMem = 6u, + GfxEop = 0x40u +}; + +using IrqHandler = std::function; + +struct IrqController { + void RegisterOnce(IrqHandler handler) { + std::unique_lock lock{m_lock}; + one_time_subscribers.emplace(handler); + } + + void Register(IrqHandler handler) { + ASSERT_MSG(!persistent_handler.has_value(), + "Too many persistent handlers"); // Add a slot map if so + + std::unique_lock lock{m_lock}; + persistent_handler.emplace(handler); + } + + void Unregister() { + std::unique_lock lock{m_lock}; + persistent_handler.reset(); + } + + void Signal(InterruptId irq) { + std::unique_lock lock{m_lock}; + + LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq)); + + if (persistent_handler) { + persistent_handler.value()(irq); + } + + while (!one_time_subscribers.empty()) { + const auto& h = one_time_subscribers.front(); + h(irq); + + one_time_subscribers.pop(); + } + } + +private: + std::optional persistent_handler{}; + std::queue one_time_subscribers{}; + std::mutex m_lock{}; +}; + +using IrqC = Common::Singleton; + +} // namespace Platform diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 679cab90..58e36017 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/io_file.h" +#include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -11,6 +12,8 @@ namespace AmdGpu { Liverpool::Liverpool() = default; void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { + Common::SetCurrentThreadName("CommandProcessor_Gfx"); + auto* header = reinterpret_cast(cmdbuf); u32 processed_cmd_size = 0; @@ -25,30 +28,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { case PM4ItOpcode::Nop: break; case PM4ItOpcode::SetContextReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetShReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetUconfigReg: { - auto* set_data = reinterpret_cast(header); + const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::IndexType: { - auto* index_type = reinterpret_cast(header); + const auto* index_type = reinterpret_cast(header); regs.index_buffer_type.raw = index_type->raw; break; } case PM4ItOpcode::DrawIndex2: { - auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.max_index_size = draw_index->max_size; regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); @@ -58,22 +61,52 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; } case PM4ItOpcode::DrawIndexAuto: { - auto* draw_index = reinterpret_cast(header); + const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; // rasterizer->DrawIndex(); break; } + case PM4ItOpcode::DispatchDirect: { + // const auto* dispatch_direct = reinterpret_cast(header); + break; + } + case PM4ItOpcode::EventWriteEos: { + const auto* event_eos = reinterpret_cast(header); + event_eos->SignalFence(); + break; + } case PM4ItOpcode::EventWriteEop: { - auto* event_write = reinterpret_cast(header); - const InterruptSelect irq_sel = event_write->int_sel; - const DataSelect data_sel = event_write->data_sel; - ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64); - *event_write->Address() = event_write->DataQWord(); + const auto* event_eop = reinterpret_cast(header); + event_eop->SignalFence(); break; } case PM4ItOpcode::DmaData: { - auto* dma_data = reinterpret_cast(header); + const auto* dma_data = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WriteData: { + const auto* write_data = reinterpret_cast(header); + ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); + const u32 data_size = (header->type3.count.Value() - 2) * 4; + if (!write_data->wr_one_addr.Value()) { + std::memcpy(write_data->Address(), write_data->data, data_size); + } else { + UNREACHABLE(); + } + break; + } + case PM4ItOpcode::AcquireMem: { + // const auto* acquire_mem = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WaitRegMem: { + const auto* wait_reg_mem = reinterpret_cast(header); + ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + while (!wait_reg_mem->Test()) { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + } break; } default: diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 229b5804..2e0030fd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -3,10 +3,15 @@ #pragma once -#include +#include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" +#include +#include +#include +#include + namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) @@ -610,7 +615,20 @@ struct Liverpool { public: Liverpool(); + void Submit(u32* cmdbuf, u32 size_in_bytes) { + ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending"); + cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes); + } + void SubmitDone() { + // This is wrong as `submitDone()` should never be blocking. The behavior will be + // reworked with mutiple queues introduction + cp.get(); + } + +private: void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); + + std::future cp{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index eb258dc3..e26830cd 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -5,7 +5,9 @@ #include #include "common/bit_field.h" +#include "common/rdtsc.h" #include "common/types.h" +#include "core/platform.h" #include "video_core/amdgpu/pm4_opcodes.h" namespace AmdGpu { @@ -201,13 +203,18 @@ struct PM4CmdNop { PM4Type3Header header; u32 data_block[0]; - enum class PayloadType : u32 { - DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope - DebugMarkerPop = 0x68750002, ///< End of GPU event scope - SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet - SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet - SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet - DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color + enum PayloadType : u32 { + DebugMarkerPush = 0x68750001u, ///< Begin of GPU event scope + DebugMarkerPop = 0x68750002u, ///< End of GPU event scope + SetVsharpInUdata = 0x68750004u, ///< Indicates that V# will be set in the next packet + SetTsharpInUdata = 0x68750005u, ///< Indicates that T# will be set in the next packet + SetSsharpInUdata = 0x68750006u, ///< Indicates that S# will be set in the next packet + DebugColorMarkerPush = 0x6875000eu, ///< Begin of GPU event scope with color + PatchedFlip = 0x68750776u, ///< Patched flip marker + PrepareFlip = 0x68750777u, ///< Flip marker + PrepareFlipLabel = 0x68750778u, ///< Flip marker with label address + PrepareFlipInterrupt = 0x68750780u, ///< Flip marker with interrupt + PrepareFlipInterruptLabel = 0x68750781u, ///< Flip marker with interrupt and label }; }; @@ -277,13 +284,52 @@ struct PM4CmdEventWriteEop { u32 data_lo; ///< Value that will be written to memory when event occurs u32 data_hi; ///< Value that will be written to memory when event occurs - u64* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + template + T* Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); + } + + u32 DataDWord() const { + return data_lo; } u64 DataQWord() const { return data_lo | u64(data_hi) << 32; } + + void SignalFence() const { + switch (data_sel.Value()) { + case DataSelect::Data32Low: { + *Address() = DataDWord(); + break; + } + case DataSelect::Data64: { + *Address() = DataQWord(); + break; + } + case DataSelect::PerfCounter: { + *Address() = Common::FencedRDTSC(); + break; + } + default: { + UNREACHABLE(); + } + } + + switch (int_sel.Value()) { + case InterruptSelect::None: { + // No interrupt + break; + } + case InterruptSelect::IrqWhenWriteConfirm: { + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); + break; + } + default: { + UNREACHABLE(); + } + } + } }; struct PM4DmaData { @@ -311,11 +357,24 @@ struct PM4DmaData { }; struct PM4CmdWaitRegMem { + enum class Engine : u32 { Me = 0u, Pfp = 1u }; + enum class MemSpace : u32 { Register = 0u, Memory = 1u }; + enum class Function : u32 { + Always = 0u, + LessThan = 1u, + LessThanEqual = 2u, + Equal = 3u, + NotEqual = 4u, + GreaterThanEqual = 5u, + GreaterThan = 6u, + Reserved = 7u + }; + PM4Type3Header header; union { - BitField<0, 3, u32> function; - BitField<4, 1, u32> mem_space; - BitField<8, 1, u32> engine; + BitField<0, 3, Function> function; + BitField<4, 1, MemSpace> mem_space; + BitField<8, 1, Engine> engine; u32 raw; }; u32 poll_addr_lo; @@ -323,6 +382,116 @@ struct PM4CmdWaitRegMem { u32 ref; u32 mask; u32 poll_interval; + + u32* Address() const { + return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); + } + + bool Test() const { + switch (function.Value()) { + case Function::Always: { + return true; + } + case Function::LessThan: { + return (*Address() & mask) < ref; + } + case Function::LessThanEqual: { + return (*Address() & mask) <= ref; + } + case Function::Equal: { + return (*Address() & mask) == ref; + } + case Function::NotEqual: { + return (*Address() & mask) != ref; + } + case Function::GreaterThanEqual: { + return (*Address() & mask) >= ref; + } + case Function::GreaterThan: { + return (*Address() & mask) > ref; + } + case Function::Reserved: + [[fallthrough]]; + default: { + UNREACHABLE(); + } + } + } +}; + +struct PM4CmdWriteData { + PM4Type3Header header; + union { + BitField<8, 11, u32> dst_sel; + BitField<16, 1, u32> wr_one_addr; + BitField<20, 1, u32> wr_confirm; + BitField<30, 1, u32> engine_sel; + u32 raw; + }; + union { + struct { + u32 dst_addr_lo; + u32 dst_addr_hi; + }; + u64 addr64; + }; + u32 data[0]; + + template + void Address(T addr) { + addr64 = reinterpret_cast(addr); + } + + template + T* Address() const { + return reinterpret_cast(addr64); + } +}; + +struct PM4CmdEventWriteEos { + enum class Command : u32 { + GdsStore = 1u, + SingalFence = 2u, + }; + + PM4Type3Header header; + union { + u32 event_control; + BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR + BitField<8, 4, u32> event_index; ///< Event index + }; + u32 address_lo; + union { + u32 cmd_info; + BitField<0, 16, u32> address_hi; ///< High bits of address + BitField<29, 3, Command> command; ///< Command + }; + union { + u32 data; ///< Fence value that will be written to memory when event occurs + BitField<0, 16, u32> + gds_index; ///< Indexed offset from the start of the segment within the partition + BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS + }; + + u32* Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); + } + + u32 DataDWord() const { + return this->data; + } + + void SignalFence() const { + switch (command.Value()) { + case Command::SingalFence: { + *Address() = DataDWord(); + break; + } + default: { + UNREACHABLE(); + } + } + } }; } // namespace AmdGpu diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 2772716a..fb3fc8c5 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -49,7 +49,7 @@ enum class PM4ItOpcode : u32 { PremableCntl = 0x4A, DmaData = 0x50, ContextRegRmw = 0x51, - Unknown58 = 0x58, + AcquireMem = 0x58, LoadShReg = 0x5F, LoadConfigReg = 0x60, LoadContextReg = 0x61,