diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 5cb7f5a9..b79ef820 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -23,6 +23,9 @@ static std::unique_ptr liverpool; // support is not important and can be ignored for a while. static constexpr bool g_fair_hw_init = false; +// In case if `submitDone` is issued we need to block submissions until GPU idle +static u32 submission_lock{}; + // Write a special ending NOP packet with N DWs data block template static inline u32* WriteTrailingNop(u32* cmdbuf) { @@ -50,18 +53,20 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { eq->addEvent(kernel_event); Platform::IrqC::Instance()->Register( - Platform::InterruptId::GfxEop, [=](Platform::InterruptId irq) { + Platform::InterruptId::GfxEop, + [=](Platform::InterruptId irq) { ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured"); // We need to conver IRQ# to event id and do // proper filtering in trigger function eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); - }); + }, + eq); return ORBIS_OK; } int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; + LOG_TRACE(Lib_GnmDriver, "called"); + return submission_lock == 0; } int PS4_SYSV_ABI sceGnmBeginWorkload() { @@ -165,7 +170,7 @@ s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) { eq->removeEvent(id); - Platform::IrqC::Instance()->Unregister(Platform::InterruptId::GfxEop); + Platform::IrqC::Instance()->Unregister(Platform::InterruptId::GfxEop, eq); return ORBIS_OK; } @@ -1411,6 +1416,14 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ } } + if (submission_lock != 0) { + liverpool->WaitGpuIdle(); + + // Suspend logic goes here + + submission_lock = 0; + } + for (auto cbpair = 0u; cbpair < count; ++cbpair) { const auto* ccb = ccb_gpu_addrs ? ccb_gpu_addrs[cbpair] : nullptr; const auto ccb_size = ccb_sizes_in_bytes ? ccb_sizes_in_bytes[cbpair] : 0; @@ -1428,9 +1441,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() { int PS4_SYSV_ABI sceGnmSubmitDone() { LOG_INFO(Lib_GnmDriver, "called"); - - liverpool->SubmitDone(); - + submission_lock = true; return ORBIS_OK; } diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 023811da..9243609d 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -12,7 +12,6 @@ EqueueInternal::~EqueueInternal() = default; int EqueueInternal::addEvent(const EqueueEvent& event) { std::scoped_lock lock{m_mutex}; - ASSERT(m_events.empty()); ASSERT(!event.isTriggered); // TODO check if event is already exists and return it. Currently we just add in m_events array @@ -21,6 +20,8 @@ int EqueueInternal::addEvent(const EqueueEvent& event) { } int EqueueInternal::removeEvent(u64 id) { + std::scoped_lock lock{m_mutex}; + const auto& event_q = std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; }); ASSERT(event_q != m_events.cend()); @@ -51,12 +52,15 @@ int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { } bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) { - std::scoped_lock lock{m_mutex}; + { + std::scoped_lock lock{m_mutex}; - ASSERT(m_events.size() <= 1); - - auto& event = m_events[0]; - event.trigger(trigger_data); + for (auto& event : m_events) { + if (event.event.ident == ident) { // event filter? + event.trigger(trigger_data); + } + } + } m_cond.notify_one(); return true; @@ -65,12 +69,11 @@ bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) { int EqueueInternal::getTriggeredEvents(SceKernelEvent* ev, int num) { int ret = 0; - ASSERT(m_events.size() <= 1); - auto& event = m_events[0]; - - if (event.isTriggered) { - ev[ret++] = event.event; - event.reset(); + for (auto& event : m_events) { + if (event.isTriggered) { + ev[ret++] = event.event; + event.reset(); + } } return ret; diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index 4e239784..d8d3bfdf 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -34,7 +34,7 @@ int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) { if (eq == nullptr) { - return SCE_KERNEL_ERROR_EBADF; + return ORBIS_KERNEL_ERROR_EBADF; } delete eq; @@ -46,7 +46,7 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int LOG_INFO(Kernel_Event, "num = {}", num); if (eq == nullptr) { - return SCE_KERNEL_ERROR_EBADF; + return ORBIS_KERNEL_ERROR_EBADF; } if (ev == nullptr) { @@ -71,7 +71,31 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int } } - return SCE_OK; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + Kernel::EqueueEvent event{}; + event.isTriggered = false; + event.event.ident = id; + event.event.filter = Kernel::EVFILT_USER; + event.event.udata = 0; + event.event.fflags = 0; + event.event.data = 0; + + return eq->addEvent(event); +} + +void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev) { + if (!ev) { + return nullptr; + } + + return ev->udata; } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index df78f9ce..3c08ed5f 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -14,5 +14,7 @@ int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name); int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq); int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, SceKernelUseconds* timo); +void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev); +int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id); } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index ee7f7ef4..3aced046 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -196,6 +196,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue); + LIB_FUNCTION("vz+pg2zdopI", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventUserData); + LIB_FUNCTION("4R6-OvI2cEA", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEvent); // misc LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); LIB_FUNCTION("Ou3iL1abvng", "libkernel", 1, "libkernel", 1, 1, stack_chk_fail); diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index cb4fecd3..d114b9c8 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -243,6 +243,13 @@ void VideoOutDriver::Vblank() { vblank_status.count++; vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + + // Trigger flip events for the port. + for (auto& event : main_port.vblank_events) { + if (event != nullptr) { + event->triggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK, Kernel::EVFILT_VIDEO_OUT, nullptr); + } + } } } // namespace Libraries::VideoOut diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index f8b9ea81..5c2bef68 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -25,6 +25,7 @@ struct VideoOutPort { FlipStatus flip_status; SceVideoOutVblankStatus vblank_status; std::vector flip_events; + std::vector vblank_events; int flip_rate = 0; s32 FindFreeGroup() const { diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 6f5ccc39..a1f971b5 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -60,6 +60,31 @@ s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, return eq->addEvent(event); } +s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata) { + LOG_INFO(Lib_VideoOut, "handle = {}", handle); + + auto* port = driver->GetPort(handle); + if (port == nullptr) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; + } + + if (eq == nullptr) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_EVENT_QUEUE; + } + + Kernel::EqueueEvent event{}; + event.isTriggered = false; + event.event.ident = SCE_VIDEO_OUT_EVENT_VBLANK; + event.event.filter = Kernel::EVFILT_VIDEO_OUT; + event.event.udata = udata; + event.event.fflags = 0; + event.event.data = 0; + event.filter.data = port; + + port->vblank_events.push_back(eq); + return eq->addEvent(event); +} + s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses, s32 bufferNum, const BufferAttribute* attribute) { if (!addresses || !attribute) { @@ -243,6 +268,8 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { sceVideoOutRegisterBuffers); LIB_FUNCTION("HXzjK9yI30k", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutAddFlipEvent); + LIB_FUNCTION("Xru92wHJRmg", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, + sceVideoOutAddVblankEvent); LIB_FUNCTION("CBiu4mCE1DA", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutSetFlipRate); LIB_FUNCTION("i6-sR91Wt-4", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 00ea6afb..b36520a2 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -88,6 +88,7 @@ void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, Pixe u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); +s32 PS4_SYSV_ABI sceVideoOutAddVBlankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses, s32 bufferNum, const BufferAttribute* attribute); s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate); diff --git a/src/core/platform.h b/src/core/platform.h index 24b267da..93f30f05 100644 --- a/src/core/platform.h +++ b/src/core/platform.h @@ -12,6 +12,7 @@ #include #include #include +#include #include namespace Platform { @@ -38,21 +39,21 @@ struct IrqController { ctx.one_time_subscribers.emplace(handler); } - void Register(InterruptId irq, IrqHandler handler) { + void Register(InterruptId irq, IrqHandler handler, void* uid) { ASSERT_MSG(static_cast(irq) < irq_contexts.size(), "Invalid IRQ number"); auto& ctx = irq_contexts[static_cast(irq)]; - ASSERT_MSG(!ctx.persistent_handler.has_value(), - "Too many persistent handlers"); // Add a slot map if so std::unique_lock lock{ctx.m_lock}; - ctx.persistent_handler.emplace(handler); + ASSERT_MSG(ctx.persistent_handlers.find(uid) == ctx.persistent_handlers.cend(), + "The handler is already registered!"); + ctx.persistent_handlers.emplace(uid, handler); } - void Unregister(InterruptId irq) { + void Unregister(InterruptId irq, void* uid) { ASSERT_MSG(static_cast(irq) < irq_contexts.size(), "Invalid IRQ number"); auto& ctx = irq_contexts[static_cast(irq)]; std::unique_lock lock{ctx.m_lock}; - ctx.persistent_handler.reset(); + ctx.persistent_handlers.erase(uid); } void Signal(InterruptId irq) { @@ -62,8 +63,8 @@ struct IrqController { LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq)); - if (ctx.persistent_handler) { - ctx.persistent_handler.value()(irq); + for (auto& [uid, h] : ctx.persistent_handlers) { + h(irq); } while (!ctx.one_time_subscribers.empty()) { @@ -76,7 +77,7 @@ struct IrqController { private: struct IrqContext { - std::optional persistent_handler{}; + std::unordered_map persistent_handlers{}; std::queue one_time_subscribers{}; std::mutex m_lock{}; }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c8e8eb0d..b40c9ba5 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -620,14 +620,6 @@ public: ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb) { - if (submission_lock) { - WaitGpuIdle(); - - // Suspend logic goes here - - submission_lock = false; - } - { std::scoped_lock lock{m_ring_access}; gfx_ring.emplace(dcb); @@ -636,22 +628,18 @@ public: } cv_submit.notify_one(); } - void SubmitDone() { - submission_lock = true; - } + + void WaitGpuIdle(); private: void ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes); void Process(std::stop_token stoken); - void WaitGpuIdle(); std::jthread process_thread{}; std::queue> gfx_ring{}; std::condition_variable_any cv_submit{}; std::condition_variable cv_complete{}; std::mutex m_ring_access{}; - - bool submission_lock{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);