diff --git a/CMakeLists.txt b/CMakeLists.txt index f2dc1d14..4380fca6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,15 +65,9 @@ if (CLANG_FORMAT) set(SRCS ${PROJECT_SOURCE_DIR}/src) set(CCOMMENT "Running clang format against all the .h and .cpp files in src/") if (WIN32) - if(MINGW) - add_custom_target(clang-format - COMMAND find `cygpath -u ${SRCS}` -iname *.h -o -iname *.cpp -o -iname *.mm | xargs `cygpath -u ${CLANG_FORMAT}` -i - COMMENT ${CCOMMENT}) - else() - add_custom_target(clang-format - COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h,*.mm -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}" - COMMENT ${CCOMMENT}) - endif() + add_custom_target(clang-format + COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h,*.mm -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}" + COMMENT ${CCOMMENT}) else() add_custom_target(clang-format COMMAND find ${SRCS} -iname *.h -o -iname *.cpp -o -iname *.mm | xargs ${CLANG_FORMAT} -i @@ -214,6 +208,7 @@ set(COMMON src/common/logging/backend.cpp src/common/native_clock.h src/common/path_util.cpp src/common/path_util.h + src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h src/common/singleton.h @@ -387,6 +382,8 @@ if (WIN32) add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS) add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN) add_definitions(-D_TIMESPEC_DEFINED) #needed for conflicts with time.h of windows.h + # Target Windows 10 RS5 + add_definitions(-DNTDDI_VERSION=0x0A000006 -D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00) endif() if(WIN32) diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index 46e382f0..5d158720 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include "common/polyfill_thread.h" namespace Common { @@ -122,7 +122,7 @@ private: } else if constexpr (Mode == PopMode::WaitWithStopToken) { // Wait until the queue is not empty. std::unique_lock lock{consumer_cv_mutex}; - consumer_cv.wait(lock, stop_token, [this, read_index] { + Common::CondvarWait(consumer_cv, lock, stop_token, [this, read_index] { return read_index != m_write_index.load(std::memory_order::acquire); }); if (stop_token.stop_requested()) { diff --git a/src/common/polyfill_thread.h b/src/common/polyfill_thread.h new file mode 100644 index 00000000..12e59a89 --- /dev/null +++ b/src/common/polyfill_thread.h @@ -0,0 +1,375 @@ +// SPDX-FileCopyrightText: 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// +// TODO: remove this file when jthread is supported by all compilation targets +// + +#pragma once + +#include + +#ifdef __cpp_lib_jthread + +#include +#include +#include +#include +#include + +namespace Common { + +template +void CondvarWait(Condvar& cv, std::unique_lock& lk, std::stop_token token, Pred&& pred) { + cv.wait(lk, token, std::forward(pred)); +} + +template +bool StoppableTimedWait(std::stop_token token, const std::chrono::duration& rel_time) { + std::condition_variable_any cv; + std::mutex m; + + // Perform the timed wait. + std::unique_lock lk{m}; + return !cv.wait_for(lk, token, rel_time, [&] { return token.stop_requested(); }); +} + +} // namespace Common + +#else + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace std { +namespace polyfill { + +using stop_state_callback = size_t; + +class stop_state { +public: + stop_state() = default; + ~stop_state() = default; + + bool request_stop() { + unique_lock lk{m_lock}; + + if (m_stop_requested) { + // Already set, nothing to do. + return false; + } + + // Mark stop requested. + m_stop_requested = true; + + while (!m_callbacks.empty()) { + // Get an iterator to the first element. + const auto it = m_callbacks.begin(); + + // Move the callback function out of the map. + function f; + swap(it->second, f); + + // Erase the now-empty map element. + m_callbacks.erase(it); + + // Run the callback. + if (f) { + f(); + } + } + + return true; + } + + bool stop_requested() const { + unique_lock lk{m_lock}; + return m_stop_requested; + } + + stop_state_callback insert_callback(function f) { + unique_lock lk{m_lock}; + + if (m_stop_requested) { + // Stop already requested. Don't insert anything, + // just run the callback synchronously. + if (f) { + f(); + } + return 0; + } + + // Insert the callback. + stop_state_callback ret = ++m_next_callback; + m_callbacks.emplace(ret, std::move(f)); + return ret; + } + + void remove_callback(stop_state_callback cb) { + unique_lock lk{m_lock}; + m_callbacks.erase(cb); + } + +private: + mutable recursive_mutex m_lock; + map> m_callbacks; + stop_state_callback m_next_callback{0}; + bool m_stop_requested{false}; +}; + +} // namespace polyfill + +class stop_token; +class stop_source; +struct nostopstate_t { + explicit nostopstate_t() = default; +}; +inline constexpr nostopstate_t nostopstate{}; + +template +class stop_callback; + +class stop_token { +public: + stop_token() noexcept = default; + + stop_token(const stop_token&) noexcept = default; + stop_token(stop_token&&) noexcept = default; + stop_token& operator=(const stop_token&) noexcept = default; + stop_token& operator=(stop_token&&) noexcept = default; + ~stop_token() = default; + + void swap(stop_token& other) noexcept { + m_stop_state.swap(other.m_stop_state); + } + + [[nodiscard]] bool stop_requested() const noexcept { + return m_stop_state && m_stop_state->stop_requested(); + } + [[nodiscard]] bool stop_possible() const noexcept { + return m_stop_state != nullptr; + } + +private: + friend class stop_source; + template + friend class stop_callback; + stop_token(shared_ptr stop_state) : m_stop_state(std::move(stop_state)) {} + +private: + shared_ptr m_stop_state; +}; + +class stop_source { +public: + stop_source() : m_stop_state(make_shared()) {} + explicit stop_source(nostopstate_t) noexcept {} + + stop_source(const stop_source&) noexcept = default; + stop_source(stop_source&&) noexcept = default; + stop_source& operator=(const stop_source&) noexcept = default; + stop_source& operator=(stop_source&&) noexcept = default; + ~stop_source() = default; + void swap(stop_source& other) noexcept { + m_stop_state.swap(other.m_stop_state); + } + + [[nodiscard]] stop_token get_token() const noexcept { + return stop_token(m_stop_state); + } + [[nodiscard]] bool stop_possible() const noexcept { + return m_stop_state != nullptr; + } + [[nodiscard]] bool stop_requested() const noexcept { + return m_stop_state && m_stop_state->stop_requested(); + } + bool request_stop() noexcept { + return m_stop_state && m_stop_state->request_stop(); + } + +private: + friend class jthread; + explicit stop_source(shared_ptr stop_state) + : m_stop_state(std::move(stop_state)) {} + +private: + shared_ptr m_stop_state; +}; + +template +class stop_callback { + static_assert(is_nothrow_destructible_v); + static_assert(is_invocable_v); + +public: + using callback_type = Callback; + + template + requires constructible_from + explicit stop_callback(const stop_token& st, + C&& cb) noexcept(is_nothrow_constructible_v) + : m_stop_state(st.m_stop_state) { + if (m_stop_state) { + m_callback = m_stop_state->insert_callback(std::move(cb)); + } + } + template + requires constructible_from + explicit stop_callback(stop_token&& st, + C&& cb) noexcept(is_nothrow_constructible_v) + : m_stop_state(std::move(st.m_stop_state)) { + if (m_stop_state) { + m_callback = m_stop_state->insert_callback(std::move(cb)); + } + } + ~stop_callback() { + if (m_stop_state && m_callback) { + m_stop_state->remove_callback(m_callback); + } + } + + stop_callback(const stop_callback&) = delete; + stop_callback(stop_callback&&) = delete; + stop_callback& operator=(const stop_callback&) = delete; + stop_callback& operator=(stop_callback&&) = delete; + +private: + shared_ptr m_stop_state; + polyfill::stop_state_callback m_callback; +}; + +template +stop_callback(stop_token, Callback) -> stop_callback; + +class jthread { +public: + using id = thread::id; + using native_handle_type = thread::native_handle_type; + + jthread() noexcept = default; + + template , jthread>>> + explicit jthread(F&& f, Args&&... args) + : m_stop_state(make_shared()), + m_thread(make_thread(std::forward(f), std::forward(args)...)) {} + + ~jthread() { + if (joinable()) { + request_stop(); + join(); + } + } + + jthread(const jthread&) = delete; + jthread(jthread&&) noexcept = default; + jthread& operator=(const jthread&) = delete; + + jthread& operator=(jthread&& other) noexcept { + m_thread.swap(other.m_thread); + m_stop_state.swap(other.m_stop_state); + return *this; + } + + void swap(jthread& other) noexcept { + m_thread.swap(other.m_thread); + m_stop_state.swap(other.m_stop_state); + } + [[nodiscard]] bool joinable() const noexcept { + return m_thread.joinable(); + } + void join() { + m_thread.join(); + } + void detach() { + m_thread.detach(); + m_stop_state.reset(); + } + + [[nodiscard]] id get_id() const noexcept { + return m_thread.get_id(); + } + [[nodiscard]] native_handle_type native_handle() { + return m_thread.native_handle(); + } + [[nodiscard]] stop_source get_stop_source() noexcept { + return stop_source(m_stop_state); + } + [[nodiscard]] stop_token get_stop_token() const noexcept { + return stop_source(m_stop_state).get_token(); + } + bool request_stop() noexcept { + return get_stop_source().request_stop(); + } + [[nodiscard]] static unsigned int hardware_concurrency() noexcept { + return thread::hardware_concurrency(); + } + +private: + template + thread make_thread(F&& f, Args&&... args) { + if constexpr (is_invocable_v, stop_token, decay_t...>) { + return thread(std::forward(f), get_stop_token(), std::forward(args)...); + } else { + return thread(std::forward(f), std::forward(args)...); + } + } + + shared_ptr m_stop_state; + thread m_thread; +}; + +} // namespace std + +namespace Common { + +template +void CondvarWait(Condvar& cv, std::unique_lock& lk, std::stop_token token, Pred pred) { + if (token.stop_requested()) { + return; + } + + std::stop_callback callback(token, [&] { + { std::scoped_lock lk2{*lk.mutex()}; } + cv.notify_all(); + }); + + cv.wait(lk, [&] { return pred() || token.stop_requested(); }); +} + +template +bool StoppableTimedWait(std::stop_token token, const std::chrono::duration& rel_time) { + if (token.stop_requested()) { + return false; + } + + bool stop_requested = false; + std::condition_variable cv; + std::mutex m; + + std::stop_callback cb(token, [&] { + // Wake up the waiting thread. + { + std::scoped_lock lk{m}; + stop_requested = true; + } + cv.notify_one(); + }); + + // Perform the timed wait. + std::unique_lock lk{m}; + return !cv.wait_for(lk, rel_time, [&] { return stop_requested; }); +} + +} // namespace Common + +#endif diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index f2313518..226137db 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "common/logging/log.h" #include "common/singleton.h" diff --git a/src/main.cpp b/src/main.cpp index 086e62d2..8898ccd4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -71,8 +71,7 @@ int main(int argc, char* argv[]) { if (!found) { Libraries::LibC::libcSymbolsRegister(&linker->getHLESymbols()); } - std::jthread mainthread([linker](std::stop_token stop_token, void*) { linker->Execute(); }, - nullptr); + std::thread mainthread([linker]() { linker->Execute(); }); Discord::RPC discordRPC; discordRPC.init(); discordRPC.update(Discord::RPCStatus::Idling, ""); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 80cd86a6..88a4f210 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -166,37 +166,34 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr Frame* frame = GetRenderFrame(); // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. - scheduler.Record([frame, vk_image = vk::Image(image.image), - size = image.info.size](vk::CommandBuffer cmdbuf) { - const vk::ImageMemoryBarrier pre_barrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferRead, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = frame->image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + const vk::ImageMemoryBarrier pre_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); - cmdbuf.blitImage(vk_image, vk::ImageLayout::eGeneral, frame->image, - vk::ImageLayout::eGeneral, - MakeImageBlit(size.width, size.height, frame->width, frame->height), - vk::Filter::eLinear); - }); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, {}, pre_barrier); + cmdbuf.blitImage( + image.image, vk::ImageLayout::eGeneral, frame->image, vk::ImageLayout::eGeneral, + MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), + vk::Filter::eLinear); // Flush pending vulkan operations. scheduler.Flush(frame->render_ready); - scheduler.WaitWorker(); return frame; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 98bd03ff..1d68055d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -129,7 +129,8 @@ bool Instance::CreateDevice() { shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); - add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -176,16 +177,9 @@ bool Instance::CreateDevice() { .shaderClipDistance = features.shaderClipDistance, }, }, - vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR{ + vk::PhysicalDeviceVulkan12Features{ .timelineSemaphore = true, }, - vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{ - .extendedDynamicState = true, - }, - vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{ - .extendedDynamicState2 = true, - .extendedDynamicState2LogicOp = true, - }, vk::PhysicalDeviceCustomBorderColorFeaturesEXT{ .customBorderColors = true, .customBorderColorWithoutFormat = true, @@ -195,6 +189,10 @@ bool Instance::CreateDevice() { }, }; + if (!index_type_uint8) { + device_chain.unlink(); + } + try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 9846c6b0..5cc890f6 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -4,7 +4,7 @@ // Include the vulkan platform specific header #if defined(ANDROID) #define VK_USE_PLATFORM_ANDROID_KHR -#elif defined(WIN32) +#elif defined(_WIN64) #define VK_USE_PLATFORM_WIN32_KHR #elif defined(__APPLE__) #define VK_USE_PLATFORM_METAL_EXT diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fe7f7e32..8e265f72 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,35 +2,14 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include "common/thread.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { -void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { - auto command = first; - while (command != nullptr) { - auto next = command->GetNext(); - command->Execute(cmdbuf); - command->~Command(); - command = next; - } - submit = false; - command_offset = 0; - first = nullptr; - last = nullptr; -} - Scheduler::Scheduler(const Instance& instance) - : master_semaphore{instance}, command_pool{instance, &master_semaphore}, use_worker_thread{ - true} { + : master_semaphore{instance}, command_pool{instance, &master_semaphore} { AllocateWorkerCommandBuffers(); - if (use_worker_thread) { - AcquireNewChunk(); - worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); - } } Scheduler::~Scheduler() = default; @@ -47,24 +26,6 @@ void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { Wait(presubmit_tick); } -void Scheduler::WaitWorker() { - if (!use_worker_thread) { - return; - } - - DispatchWork(); - - // Ensure the queue is drained. - { - std::unique_lock ql{queue_mutex}; - event_cv.wait(ql, [this] { return work_queue.empty(); }); - } - - // Now wait for execution to finish. - // This needs to be done in the same order as WorkerThread. - std::scoped_lock el{execution_mutex}; -} - void Scheduler::Wait(u64 tick) { if (tick >= master_semaphore.CurrentTick()) { // Make sure we are not waiting for the current tick without signalling @@ -73,73 +34,6 @@ void Scheduler::Wait(u64 tick) { master_semaphore.Wait(tick); } -void Scheduler::DispatchWork() { - if (!use_worker_thread || chunk->Empty()) { - return; - } - - { - std::scoped_lock ql{queue_mutex}; - work_queue.push(std::move(chunk)); - } - - event_cv.notify_all(); - AcquireNewChunk(); -} - -void Scheduler::WorkerThread(std::stop_token stop_token) { - Common::SetCurrentThreadName("VulkanWorker"); - - const auto TryPopQueue{[this](auto& work) -> bool { - if (work_queue.empty()) { - return false; - } - - work = std::move(work_queue.front()); - work_queue.pop(); - event_cv.notify_all(); - return true; - }}; - - while (!stop_token.stop_requested()) { - std::unique_ptr work; - - { - std::unique_lock lk{queue_mutex}; - - // Wait for work. - event_cv.wait(lk, stop_token, [&] { return TryPopQueue(work); }); - - // If we've been asked to stop, we're done. - if (stop_token.stop_requested()) { - return; - } - - // Exchange lock ownership so that we take the execution lock before - // the queue lock goes out of scope. This allows us to force execution - // to complete in the next step. - std::exchange(lk, std::unique_lock{execution_mutex}); - - // Perform the work, tracking whether the chunk was a submission - // before executing. - const bool has_submit = work->HasSubmit(); - work->ExecuteAll(current_cmdbuf); - - // If the chunk was a submission, reallocate the command buffer. - if (has_submit) { - AllocateWorkerCommandBuffers(); - } - } - - { - std::scoped_lock rl{reserve_mutex}; - - // Recycle the chunk back to the reserve. - chunk_reserve.emplace_back(std::move(work)); - } - } -} - void Scheduler::AllocateWorkerCommandBuffers() { const vk::CommandBufferBeginInfo begin_info = { .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, @@ -152,30 +46,10 @@ void Scheduler::AllocateWorkerCommandBuffers() { void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { const u64 signal_value = master_semaphore.NextTick(); - Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { - std::scoped_lock lock{submit_mutex}; - master_semaphore.SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value); - }); - + std::scoped_lock lk{submit_mutex}; + master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); master_semaphore.Refresh(); - - if (!use_worker_thread) { - AllocateWorkerCommandBuffers(); - } else { - chunk->MarkSubmit(); - DispatchWork(); - } -} - -void Scheduler::AcquireNewChunk() { - std::scoped_lock lock{reserve_mutex}; - if (chunk_reserve.empty()) { - chunk = std::make_unique(); - return; - } - - chunk = std::move(chunk_reserve.back()); - chunk_reserve.pop_back(); + AllocateWorkerCommandBuffers(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 9eb456c3..fde48824 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,13 +4,6 @@ #pragma once #include -#include -#include -#include -#include -#include - -#include "common/alignment.h" #include "common/types.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -19,8 +12,6 @@ namespace Vulkan { class Instance; -/// The scheduler abstracts command buffer and fence management with an interface that's able to do -/// OpenGL-like operations on Vulkan command buffers. class Scheduler { public: explicit Scheduler(const Instance& instance); @@ -32,34 +23,12 @@ public: /// Sends the current execution context to the GPU and waits for it to complete. void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); - /// Waits for the worker thread to finish executing everything. After this function returns it's - /// safe to touch worker resources. - void WaitWorker(); - /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); - /// Sends currently recorded work to the worker thread. - void DispatchWork(); - - /// Records the command to the current chunk. - template - void Record(T&& command) { - if (chunk->Record(command)) { - return; - } - DispatchWork(); - (void)chunk->Record(command); - } - - /// Registers a callback to perform on queue submission. - void RegisterOnSubmit(std::function&& func) { - on_submit = std::move(func); - } - - /// Registers a callback to perform on queue submission. - void RegisterOnDispatch(std::function&& func) { - on_dispatch = std::move(func); + /// Returns the current command buffer. + vk::CommandBuffer CommandBuffer() const { + return current_cmdbuf; } /// Returns the current command buffer tick. @@ -80,113 +49,15 @@ public: std::mutex submit_mutex; private: - class Command { - public: - virtual ~Command() = default; - - virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; - - Command* GetNext() const { - return next; - } - - void SetNext(Command* next_) { - next = next_; - } - - private: - Command* next = nullptr; - }; - - template - class TypedCommand final : public Command { - public: - explicit TypedCommand(T&& command_) : command{std::move(command_)} {} - ~TypedCommand() override = default; - - TypedCommand(TypedCommand&&) = delete; - TypedCommand& operator=(TypedCommand&&) = delete; - - void Execute(vk::CommandBuffer cmdbuf) const override { - command(cmdbuf); - } - - private: - T command; - }; - - class CommandChunk final { - public: - void ExecuteAll(vk::CommandBuffer cmdbuf); - - template - bool Record(T& command) { - using FuncType = TypedCommand; - static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); - - recorded_counts++; - command_offset = Common::alignUp(command_offset, alignof(FuncType)); - if (command_offset > sizeof(data) - sizeof(FuncType)) { - return false; - } - Command* const current_last = last; - last = new (data.data() + command_offset) FuncType(std::move(command)); - - if (current_last) { - current_last->SetNext(last); - } else { - first = last; - } - command_offset += sizeof(FuncType); - return true; - } - - void MarkSubmit() { - submit = true; - } - - bool Empty() const { - return recorded_counts == 0; - } - - bool HasSubmit() const { - return submit; - } - - private: - Command* first = nullptr; - Command* last = nullptr; - - std::size_t recorded_counts = 0; - std::size_t command_offset = 0; - bool submit = false; - alignas(std::max_align_t) std::array data{}; - }; - -private: - void WorkerThread(std::stop_token stop_token); - void AllocateWorkerCommandBuffers(); void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); - void AcquireNewChunk(); - private: MasterSemaphore master_semaphore; CommandPool command_pool; - std::unique_ptr chunk; - std::queue> work_queue; - std::vector> chunk_reserve; vk::CommandBuffer current_cmdbuf; - std::function on_submit; - std::function on_dispatch; - std::mutex execution_mutex; - std::mutex reserve_mutex; - std::mutex queue_mutex; std::condition_variable_any event_cv; - std::jthread worker_thread; - bool use_worker_thread; }; } // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e32ddf9b..8dd6156c 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -51,6 +51,18 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe size.width = attrib.width; size.height = attrib.height; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7; + const bool is_32bpp = pixel_format == vk::Format::eB8G8R8A8Srgb || + pixel_format == vk::Format::eA8B8G8R8SrgbPack32; + ASSERT(is_32bpp); + if (!is_tiled) { + guest_size_bytes = pitch * size.height * 4; + return; + } + if (Config::isNeoMode()) { + guest_size_bytes = pitch * 128 * ((size.height + 127) & (~127)) * 4; + } else { + guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4; + } } UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) @@ -83,8 +95,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) { Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const ImageInfo& info_, VAddr cpu_addr) - : instance{&instance_}, scheduler{&scheduler_}, info{info_}, - image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr} { + : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), + instance->GetAllocator()}, + cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { vk::ImageCreateFlags flags{}; if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && info.size.width == info.size.height) { @@ -111,39 +124,27 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, image.Create(image_ci); - const vk::Image handle = image; - scheduler->Record([handle](vk::CommandBuffer cmdbuf) { - const vk::ImageMemoryBarrier init_barrier = { - .srcAccessMask = vk::AccessFlagBits::eNone, - .dstAccessMask = vk::AccessFlagBits::eNone, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = handle, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, - vk::PipelineStageFlagBits::eTopOfPipe, - vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier); - }); + const vk::ImageMemoryBarrier init_barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; - const bool is_32bpp = info.pixel_format == vk::Format::eB8G8R8A8Srgb || - info.pixel_format == vk::Format::eA8B8G8R8SrgbPack32; - ASSERT(info.is_tiled && is_32bpp); - - if (Config::isNeoMode()) { - guest_size_bytes = info.pitch * 128 * ((info.size.height + 127) & (~127)) * 4; - } else { - guest_size_bytes = info.pitch * 128 * ((info.size.height + 63) & (~63)) * 4; - } - cpu_addr_end = cpu_addr + guest_size_bytes; + const auto cmdbuf = scheduler->CommandBuffer(); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, vk::DependencyFlagBits::eByRegion, + {}, {}, init_barrier); } Image::~Image() = default; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index cb8ff052..21c07814 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -38,7 +38,8 @@ struct ImageInfo { vk::ImageType type = vk::ImageType::e1D; SubresourceExtent resources; Extent3D size{1, 1, 1}; - u32 pitch; + u32 pitch = 0; + u32 guest_size_bytes = 0; }; struct Handle { @@ -105,12 +106,9 @@ struct Image { ImageInfo info; UniqueImage image; vk::ImageAspectFlags aspect_mask; - u32 guest_size_bytes = 0; - size_t channel = 0; ImageFlagBits flags = ImageFlagBits::CpuModified; VAddr cpu_addr = 0; VAddr cpu_addr_end = 0; - u64 modification_tick = 0; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3ec43bb4..bac88c95 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -132,10 +132,15 @@ void TextureCache::RefreshImage(Image& image) { image.flags &= ~ImageFlagBits::CpuModified; // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(image.guest_size_bytes, 0); - ConvertTileToLinear(data, reinterpret_cast(image.cpu_addr), image.info.size.width, - image.info.size.height, Config::isNeoMode()); - staging.Commit(image.guest_size_bytes); + const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 0); + const u8* image_data = reinterpret_cast(image.cpu_addr); + if (image.info.is_tiled) { + ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height, + Config::isNeoMode()); + } else { + std::memcpy(data, image_data, image.info.guest_size_bytes); + } + staging.Commit(image.info.guest_size_bytes); // Copy to the image. const vk::BufferImageCopy image_copy = { @@ -152,11 +157,43 @@ void TextureCache::RefreshImage(Image& image) { .imageExtent = {image.info.size.width, image.info.size.height, 1}, }; - const vk::Buffer src_buffer = staging.Handle(); - const vk::Image dst_image = image.image; - scheduler.Record([src_buffer, dst_image, image_copy](vk::CommandBuffer cmdbuf) { - cmdbuf.copyBufferToImage(src_buffer, dst_image, vk::ImageLayout::eGeneral, image_copy); - }); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image.image, + .subresourceRange = range, + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image.image, + .subresourceRange = range, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllGraphics, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, {}, read_barrier); + cmdbuf.copyBufferToImage(staging.Handle(), image.image, vk::ImageLayout::eTransferDstOptimal, + image_copy); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllGraphics, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); } void TextureCache::RegisterImage(ImageId image_id) { @@ -164,7 +201,7 @@ void TextureCache::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, + ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); } @@ -173,7 +210,7 @@ void TextureCache::UnregisterImage(ImageId image_id) { ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); @@ -195,7 +232,7 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) { return; } image.flags |= ImageFlagBits::Tracked; - UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); } void TextureCache::UntrackImage(Image& image, ImageId image_id) { @@ -203,7 +240,7 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) { return; } image.flags &= ~ImageFlagBits::Tracked; - UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1); } void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {