kernel: added HR timers and asio service thread

2024-07-10 20:02:56 +02:00 · 2024-07-10 20:02:56 +02:00 · 0bb5240202
parent 465cb0413e
commit 0bb5240202
4 changed files with 189 additions and 22 deletions
--- a/src/core/libraries/kernel/event_queue.cpp
+++ b/src/core/libraries/kernel/event_queue.cpp
@ -15,9 +15,9 @@ bool EqueueInternal::AddEvent(EqueueEvent& event) {

    const auto& it = std::ranges::find(m_events, event);
    if (it != m_events.cend()) {
-        *it = event;
+        *it = std::move(event);
    } else {
-        m_events.emplace_back(event);
+        m_events.emplace_back(std::move(event));
    }

    return true;
@ -37,20 +37,32 @@ bool EqueueInternal::RemoveEvent(u64 id) {
 }

 int EqueueInternal::WaitForEvents(SceKernelEvent* ev, int num, u32 micros) {
-    std::unique_lock lock{m_mutex};
-    int ret = 0;
+    int count = 0;

    const auto predicate = [&] {
-        ret = GetTriggeredEvents(ev, num);
-        return ret > 0;
+        count = GetTriggeredEvents(ev, num);
+        return count > 0;
    };

    if (micros == 0) {
+        std::unique_lock lock{m_mutex};
        m_cond.wait(lock, predicate);
    } else {
+        std::unique_lock lock{m_mutex};
        m_cond.wait_for(lock, std::chrono::microseconds(micros), predicate);
    }
-    return ret;
+
+    if (ev->flags & SceKernelEvent::Flags::OneShot) {
+        for (auto ev_id = 0u; ev_id < count; ++ev_id) {
+            RemoveEvent(ev->ident);
+        }
+    }
+
+    if (HasSmallTimer()) {
+        count = WaitForSmallTimer(ev, num, micros);
+    }
+
+    return count;
 }

 bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
@ -72,7 +84,7 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
 }

 int EqueueInternal::GetTriggeredEvents(SceKernelEvent* ev, int num) {
-    int ret = 0;
+    int count = 0;

    for (auto& event : m_events) {
        if (event.IsTriggered()) {
@ -80,15 +92,53 @@ int EqueueInternal::GetTriggeredEvents(SceKernelEvent* ev, int num) {
                event.Reset();
            }

-            ev[ret++] = event.event;
+            ev[count++] = event.event;

-            if (ret == num) {
+            if (count == num) {
                break;
            }
        }
    }

-    return ret;
+    return count;
+}
+
+bool EqueueInternal::AddSmallTimer(EqueueEvent& ev) {
+    // We assume that only one timer event (with the same ident across calls)
+    // can be posted to the queue, based on observations so far. In the opposite case,
+    // the small timer storage and wait logic should be reworked.
+    ASSERT(!HasSmallTimer() || small_timer_event.event.ident == ev.event.ident);
+    ev.time_added = std::chrono::high_resolution_clock::now();
+    small_timer_event = std::move(ev);
+    return true;
+}
+
+int EqueueInternal::WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros) {
+    int count{};
+
+    ASSERT(num == 1);
+
+    auto curr_clock = std::chrono::high_resolution_clock::now();
+    const auto wait_end_us = curr_clock + std::chrono::microseconds{micros};
+
+    do {
+        curr_clock = std::chrono::high_resolution_clock::now();
+
+        {
+            std::unique_lock lock{m_mutex};
+            if ((curr_clock - small_timer_event.time_added) >
+                std::chrono::microseconds{small_timer_event.event.data}) {
+                ev[count++] = small_timer_event.event;
+                small_timer_event.event.data = 0;
+                break;
+            }
+        }
+
+        std::this_thread::yield();
+
+    } while (curr_clock < wait_end_us);
+
+    return count;
 }

 } // namespace Libraries::Kernel
--- a/src/core/libraries/kernel/event_queue.h
+++ b/src/core/libraries/kernel/event_queue.h
@ -7,6 +7,9 @@
 #include <mutex>
 #include <string>
 #include <vector>
+
+#include <boost/asio/steady_timer.hpp>
+
 #include "common/types.h"

 namespace Libraries::Kernel {
@ -58,6 +61,7 @@ struct EqueueEvent {
    SceKernelEvent event;
    void* data = nullptr;
    std::chrono::steady_clock::time_point time_added;
+    std::unique_ptr<boost::asio::steady_timer> timer;

    void Reset() {
        is_triggered = false;
@ -99,10 +103,17 @@ public:
    bool TriggerEvent(u64 ident, s16 filter, void* trigger_data);
    int GetTriggeredEvents(SceKernelEvent* ev, int num);

+    bool AddSmallTimer(EqueueEvent& event);
+    bool HasSmallTimer() const {
+        return small_timer_event.event.data != 0;
+    }
+    int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros);
+
 private:
    std::string m_name;
    std::mutex m_mutex;
    std::vector<EqueueEvent> m_events;
+    EqueueEvent small_timer_event{};
    std::condition_variable m_cond;
 };

--- a/src/core/libraries/kernel/event_queues.cpp
+++ b/src/core/libraries/kernel/event_queues.cpp
@ -7,8 +7,24 @@
 #include "core/libraries/error_codes.h"
 #include "core/libraries/kernel/event_queues.h"

+#include <boost/asio/placeholders.hpp>
+
 namespace Libraries::Kernel {

+extern boost::asio::io_context io_context;
+extern void KernelSignalRequest();
+
+static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
+
+static void SmallTimerCallback(const boost::system::error_code& error, SceKernelEqueue eq,
+                               SceKernelEvent kevent) {
+    static EqueueEvent event;
+    event.event = kevent;
+    event.event.data = HrTimerSpinlockThresholdUs;
+    eq->AddSmallTimer(event);
+    eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
+}
+
 int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
    if (eq == nullptr) {
        LOG_ERROR(Kernel_Event, "Event queue is null!");
@ -60,17 +76,23 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
        return ORBIS_KERNEL_ERROR_EINVAL;
    }

-    if (timo == nullptr) { // wait until an event arrives without timing out
-        *out = eq->WaitForEvents(ev, num, 0);
-    }
+    if (eq->HasSmallTimer()) {
+        ASSERT(timo && *timo);
+        *out = eq->WaitForSmallTimer(ev, num, *timo);
+    } else {
+        if (timo == nullptr) { // wait until an event arrives without timing out
+            *out = eq->WaitForEvents(ev, num, 0);
+        }

-    if (timo != nullptr) {
-        // Only events that have already arrived at the time of this function call can be received
-        if (*timo == 0) {
-            *out = eq->GetTriggeredEvents(ev, num);
-        } else {
-            // Wait until an event arrives with timing out
-            *out = eq->WaitForEvents(ev, num, *timo);
+        if (timo != nullptr) {
+            // Only events that have already arrived at the time of this function call can be
+            // received
+            if (*timo == 0) {
+                *out = eq->GetTriggeredEvents(ev, num);
+            } else {
+                // Wait until an event arrives with timing out
+                *out = eq->WaitForEvents(ev, num, *timo);
+            }
        }
    }

@ -81,6 +103,51 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
    return ORBIS_OK;
 }

+s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) {
+    if (eq == nullptr) {
+        return ORBIS_KERNEL_ERROR_EBADF;
+    }
+
+    if (ts->tv_sec > 100 || ts->tv_nsec < 100'000) {
+        return ORBIS_KERNEL_ERROR_EINVAL;
+    }
+    ASSERT(ts->tv_nsec > 1000); // assume 1us resolution
+    const auto total_us = ts->tv_sec * 1000'000 + ts->tv_nsec / 1000;
+
+    EqueueEvent event{};
+    event.event.ident = id;
+    event.event.filter = SceKernelEvent::Filter::HrTimer;
+    event.event.flags = SceKernelEvent::Flags::Add | SceKernelEvent::Flags::OneShot;
+    event.event.fflags = 0;
+    event.event.data = total_us;
+    event.event.udata = udata;
+
+    // HR timers cannot be implemented within the existing event queue architecture due to the
+    // slowness of the notification mechanism. For instance, a 100us timer will lose its precision
+    // as the trigger time drifts by +50-700%, depending on the host PC and workload. To address
+    // this issue, we use a spinlock for small waits (which can be adjusted using
+    // `HrTimerSpinlockThresholdUs`) and fall back to boost asio timers if the time to tick is
+    // large. Even for large delays, we truncate a small portion to complete the wait
+    // using the spinlock, prioritizing precision.
+    if (total_us < HrTimerSpinlockThresholdUs) {
+        return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM;
+    }
+
+    event.timer = std::make_unique<boost::asio::steady_timer>(
+        io_context, std::chrono::microseconds(total_us - HrTimerSpinlockThresholdUs));
+
+    event.timer->async_wait(
+        std::bind(SmallTimerCallback, boost::asio::placeholders::error, eq, event.event));
+
+    if (!eq->AddEvent(event)) {
+        return ORBIS_KERNEL_ERROR_ENOMEM;
+    }
+
+    KernelSignalRequest();
+
+    return ORBIS_OK;
+}
+
 int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) {
    if (eq == nullptr) {
        return ORBIS_KERNEL_ERROR_EBADF;
--- a/src/core/libraries/kernel/libkernel.cpp
+++ b/src/core/libraries/kernel/libkernel.cpp
@ -3,9 +3,14 @@

 #include <chrono>
 #include <thread>
+
+#include <boost/asio/io_context.hpp>
+
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/singleton.h"
+#include "common/thread.h"
+#include "core/file_format/psf.h"
 #include "core/file_sys/fs.h"
 #include "core/libraries/error_codes.h"
 #include "core/libraries/kernel/cpu_management.h"
@ -19,6 +24,7 @@
 #include "core/libraries/libs.h"
 #include "core/linker.h"
 #include "core/memory.h"
+
 #ifdef _WIN64
 #include <io.h>
 #include <objbase.h>
@ -26,12 +32,43 @@
 #else
 #include <sys/mman.h>
 #endif
-#include <core/file_format/psf.h>

 namespace Libraries::Kernel {

 static u64 g_stack_chk_guard = 0xDEADBEEF54321ABC; // dummy return

+boost::asio::io_context io_context;
+std::mutex m_asio_req;
+std::condition_variable_any cv_asio_req;
+std::atomic<u32> asio_requests;
+std::jthread service_thread;
+
+void KernelSignalRequest() {
+    std::unique_lock lock{m_asio_req};
+    ++asio_requests;
+    cv_asio_req.notify_one();
+}
+
+static void KernelServiceThread(std::stop_token stoken) {
+    Common::SetCurrentThreadName("Kernel_ServiceThread");
+
+    while (!stoken.stop_requested()) {
+        HLE_TRACE;
+        {
+            std::unique_lock lock{m_asio_req};
+            cv_asio_req.wait(lock, stoken, [] { return asio_requests != 0; });
+        }
+        if (stoken.stop_requested()) {
+            break;
+        }
+
+        io_context.run();
+        io_context.reset();
+
+        asio_requests = 0;
+    }
+}
+
 static void* PS4_SYSV_ABI sceKernelGetProcParam() {
    auto* linker = Common::Singleton<Core::Linker>::Instance();
    return reinterpret_cast<void*>(linker->GetProcParam());
@ -310,6 +347,8 @@ int PS4_SYSV_ABI _sigprocmask() {
 }

 void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
+    service_thread = std::jthread{KernelServiceThread};
+
    // obj
    LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard);
    // misc