diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 97b1816e..25de48a4 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -9,6 +9,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" extern std::unique_ptr renderer; @@ -173,14 +174,19 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { // Update flip status. auto* port = req.port; - auto& flip_status = port->flip_status; - flip_status.count++; - flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.flipArg = req.flip_arg; - flip_status.currentBuffer = req.index; - flip_status.flipPendingNum = static_cast(requests.size()); + { + std::unique_lock lock{port->port_mutex}; + auto& flip_status = port->flip_status; + flip_status.count++; + flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + flip_status.flipArg = req.flip_arg; + flip_status.currentBuffer = req.index; + if (req.eop) { + --flip_status.gcQueueNum; + } + --flip_status.flipPendingNum; + } // Trigger flip events for the port. for (auto& event : port->flip_events) { @@ -202,34 +208,54 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { + { + std::unique_lock lock{port->port_mutex}; + if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); + return false; + } + + if (is_eop) { + ++port->flip_status.gcQueueNum; + } + ++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter + port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); + } + + if (!is_eop) { + // Before processing the flip we need to ask GPU thread to flush command list as at this + // point VO surface is ready to be presented, and we will need have an actual state of + // Vulkan image at the time of frame presentation. + liverpool->SendCommand([=, this]() { + renderer->FlushDraw(); + SubmitFlipInternal(port, index, flip_arg, is_eop); + }); + } else { + SubmitFlipInternal(port, index, flip_arg, is_eop); + } + + return true; +} + +void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { Vulkan::Frame* frame; if (index == -1) { - frame = renderer->PrepareBlankFrame(); + frame = renderer->PrepareBlankFrame(is_eop); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); } - if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { - LOG_ERROR(Lib_VideoOut, "Flip queue is full"); - return false; - } - std::scoped_lock lock{mutex}; requests.push({ .frame = frame, .port = port, .index = index, .flip_arg = flip_arg, - .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), .eop = is_eop, }); - - port->flip_status.flipPendingNum = static_cast(requests.size()); - port->flip_status.gcQueueNum = 0; - - return true; } void VideoOutDriver::PresentThread(std::stop_token token) { diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 104056de..bee80060 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -29,6 +29,7 @@ struct VideoOutPort { std::vector flip_events; std::vector vblank_events; std::mutex vo_mutex; + std::mutex port_mutex; std::condition_variable vo_cv; std::condition_variable vblank_cv; int flip_rate = 0; @@ -93,7 +94,6 @@ private: VideoOutPort* port; s32 index; s64 flip_arg; - u64 submit_tsc; bool eop; operator bool() const noexcept { @@ -102,6 +102,7 @@ private: }; std::chrono::microseconds Flip(const Request& req); + void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void PresentThread(std::stop_token token); std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 15e14662..acfcbad4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) { s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) { LOG_INFO(Lib_VideoOut, "called"); - s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum; + auto* port = driver->GetPort(handle); + std::unique_lock lock{port->port_mutex}; + s32 pending = port->flip_status.flipPendingNum; return pending; } @@ -161,6 +163,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->port_mutex}; *status = port->flip_status; LOG_INFO(Lib_VideoOut, diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a9665a02..dce2d4b4 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) { { std::unique_lock lk{submit_mutex}; Common::CondvarWait(submit_cv, lk, stoken, - [this] { return num_submits != 0 || submit_done; }); + [this] { return num_commands || num_submits || submit_done; }); } if (stoken.stop_requested()) { break; @@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) { int qid = -1; - while (num_submits) { + while (num_submits || num_commands) { + + // Process incoming commands with high priority + while (num_commands) { + + Common::UniqueFunction callback{}; + { + std::unique_lock lk{submit_mutex}; + callback = std::move(command_queue.back()); + command_queue.pop(); + } + + callback(); + + --num_commands; + } + qid = (qid + 1) % NumTotalQueues; auto& queue = mapped_queues[qid]; @@ -219,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span #include #include + #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" #include "common/types.h" +#include "common/unique_function.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -1054,6 +1056,13 @@ public: rasterizer = rasterizer_; } + void SendCommand(Common::UniqueFunction&& func) { + std::scoped_lock lk{submit_mutex}; + command_queue.emplace(std::move(func)); + ++num_commands; + submit_cv.notify_one(); + } + private: struct Task { struct promise_type { @@ -1122,9 +1131,11 @@ private: Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic num_commands{}; std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; + std::queue> command_queue{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 113b380e..eab9d527 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -48,13 +48,14 @@ public: VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info); + texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image, is_eop); } - Frame* PrepareBlankFrame() { + Frame* PrepareBlankFrame(bool is_eop) { auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image, true); + return PrepareFrameInternal(image, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -75,6 +76,11 @@ public: void Present(Frame* frame); void RecreateFrame(Frame* frame, u32 width, u32 height); + void FlushDraw() { + SubmitInfo info{}; + draw_scheduler.Flush(info); + } + private: Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* GetRenderFrame(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 6b14faac..6bc893b0 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -223,7 +223,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, return RegisterImageView(image_id, view_info); } -void TextureCache::RefreshImage(Image& image) { +void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; @@ -269,8 +269,10 @@ void TextureCache::RefreshImage(Image& image) { return; } - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); + auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler; + sched_ptr->EndRendering(); + + const auto cmdbuf = sched_ptr->CommandBuffer(); image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); const VAddr image_addr = image.info.guest_address; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b3af0ff1..137b6014 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -59,17 +59,17 @@ public: const ImageViewInfo& view_info); /// Updates image contents if it was modified by CPU. - void UpdateImage(ImageId image_id) { + void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { Image& image = slot_images[image_id]; if (False(image.flags & ImageFlagBits::CpuModified)) { return; } - RefreshImage(image); + RefreshImage(image, custom_scheduler); TrackImage(image, image_id); } /// Reuploads image contents. - void RefreshImage(Image& image); + void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);