video_core: CPU flip relay (#415)
* video_core: cpu flip is propagated via gpu thread now * tentative fix for cpu flips racing * libraries: videoout: better flip status handling
This commit is contained in:
parent
ad3b6c793c
commit
27cb218584
|
@ -9,6 +9,7 @@
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/kernel/time_management.h"
|
#include "core/libraries/kernel/time_management.h"
|
||||||
#include "core/libraries/videoout/driver.h"
|
#include "core/libraries/videoout/driver.h"
|
||||||
|
#include "core/platform.h"
|
||||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
|
|
||||||
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||||
|
@ -173,14 +174,19 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
|
||||||
|
|
||||||
// Update flip status.
|
// Update flip status.
|
||||||
auto* port = req.port;
|
auto* port = req.port;
|
||||||
auto& flip_status = port->flip_status;
|
{
|
||||||
flip_status.count++;
|
std::unique_lock lock{port->port_mutex};
|
||||||
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
auto& flip_status = port->flip_status;
|
||||||
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
flip_status.count++;
|
||||||
flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc();
|
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||||
flip_status.flipArg = req.flip_arg;
|
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||||
flip_status.currentBuffer = req.index;
|
flip_status.flipArg = req.flip_arg;
|
||||||
flip_status.flipPendingNum = static_cast<int>(requests.size());
|
flip_status.currentBuffer = req.index;
|
||||||
|
if (req.eop) {
|
||||||
|
--flip_status.gcQueueNum;
|
||||||
|
}
|
||||||
|
--flip_status.flipPendingNum;
|
||||||
|
}
|
||||||
|
|
||||||
// Trigger flip events for the port.
|
// Trigger flip events for the port.
|
||||||
for (auto& event : port->flip_events) {
|
for (auto& event : port->flip_events) {
|
||||||
|
@ -202,34 +208,54 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
|
||||||
|
|
||||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||||
bool is_eop /*= false*/) {
|
bool is_eop /*= false*/) {
|
||||||
|
{
|
||||||
|
std::unique_lock lock{port->port_mutex};
|
||||||
|
if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) {
|
||||||
|
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_eop) {
|
||||||
|
++port->flip_status.gcQueueNum;
|
||||||
|
}
|
||||||
|
++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter
|
||||||
|
port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_eop) {
|
||||||
|
// Before processing the flip we need to ask GPU thread to flush command list as at this
|
||||||
|
// point VO surface is ready to be presented, and we will need have an actual state of
|
||||||
|
// Vulkan image at the time of frame presentation.
|
||||||
|
liverpool->SendCommand([=, this]() {
|
||||||
|
renderer->FlushDraw();
|
||||||
|
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||||
|
bool is_eop /*= false*/) {
|
||||||
Vulkan::Frame* frame;
|
Vulkan::Frame* frame;
|
||||||
if (index == -1) {
|
if (index == -1) {
|
||||||
frame = renderer->PrepareBlankFrame();
|
frame = renderer->PrepareBlankFrame(is_eop);
|
||||||
} else {
|
} else {
|
||||||
const auto& buffer = port->buffer_slots[index];
|
const auto& buffer = port->buffer_slots[index];
|
||||||
const auto& group = port->groups[buffer.group_index];
|
const auto& group = port->groups[buffer.group_index];
|
||||||
frame = renderer->PrepareFrame(group, buffer.address_left, is_eop);
|
frame = renderer->PrepareFrame(group, buffer.address_left, is_eop);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
|
|
||||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::scoped_lock lock{mutex};
|
std::scoped_lock lock{mutex};
|
||||||
requests.push({
|
requests.push({
|
||||||
.frame = frame,
|
.frame = frame,
|
||||||
.port = port,
|
.port = port,
|
||||||
.index = index,
|
.index = index,
|
||||||
.flip_arg = flip_arg,
|
.flip_arg = flip_arg,
|
||||||
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
|
|
||||||
.eop = is_eop,
|
.eop = is_eop,
|
||||||
});
|
});
|
||||||
|
|
||||||
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
|
||||||
port->flip_status.gcQueueNum = 0;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VideoOutDriver::PresentThread(std::stop_token token) {
|
void VideoOutDriver::PresentThread(std::stop_token token) {
|
||||||
|
|
|
@ -29,6 +29,7 @@ struct VideoOutPort {
|
||||||
std::vector<Kernel::SceKernelEqueue> flip_events;
|
std::vector<Kernel::SceKernelEqueue> flip_events;
|
||||||
std::vector<Kernel::SceKernelEqueue> vblank_events;
|
std::vector<Kernel::SceKernelEqueue> vblank_events;
|
||||||
std::mutex vo_mutex;
|
std::mutex vo_mutex;
|
||||||
|
std::mutex port_mutex;
|
||||||
std::condition_variable vo_cv;
|
std::condition_variable vo_cv;
|
||||||
std::condition_variable vblank_cv;
|
std::condition_variable vblank_cv;
|
||||||
int flip_rate = 0;
|
int flip_rate = 0;
|
||||||
|
@ -93,7 +94,6 @@ private:
|
||||||
VideoOutPort* port;
|
VideoOutPort* port;
|
||||||
s32 index;
|
s32 index;
|
||||||
s64 flip_arg;
|
s64 flip_arg;
|
||||||
u64 submit_tsc;
|
|
||||||
bool eop;
|
bool eop;
|
||||||
|
|
||||||
operator bool() const noexcept {
|
operator bool() const noexcept {
|
||||||
|
@ -102,6 +102,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
std::chrono::microseconds Flip(const Request& req);
|
std::chrono::microseconds Flip(const Request& req);
|
||||||
|
void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||||
void PresentThread(std::stop_token token);
|
void PresentThread(std::stop_token token);
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
|
@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) {
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) {
|
s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) {
|
||||||
LOG_INFO(Lib_VideoOut, "called");
|
LOG_INFO(Lib_VideoOut, "called");
|
||||||
s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum;
|
auto* port = driver->GetPort(handle);
|
||||||
|
std::unique_lock lock{port->port_mutex};
|
||||||
|
s32 pending = port->flip_status.flipPendingNum;
|
||||||
return pending;
|
return pending;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,6 +163,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) {
|
||||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_lock lock{port->port_mutex};
|
||||||
*status = port->flip_status;
|
*status = port->flip_status;
|
||||||
|
|
||||||
LOG_INFO(Lib_VideoOut,
|
LOG_INFO(Lib_VideoOut,
|
||||||
|
|
|
@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
{
|
{
|
||||||
std::unique_lock lk{submit_mutex};
|
std::unique_lock lk{submit_mutex};
|
||||||
Common::CondvarWait(submit_cv, lk, stoken,
|
Common::CondvarWait(submit_cv, lk, stoken,
|
||||||
[this] { return num_submits != 0 || submit_done; });
|
[this] { return num_commands || num_submits || submit_done; });
|
||||||
}
|
}
|
||||||
if (stoken.stop_requested()) {
|
if (stoken.stop_requested()) {
|
||||||
break;
|
break;
|
||||||
|
@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
|
|
||||||
int qid = -1;
|
int qid = -1;
|
||||||
|
|
||||||
while (num_submits) {
|
while (num_submits || num_commands) {
|
||||||
|
|
||||||
|
// Process incoming commands with high priority
|
||||||
|
while (num_commands) {
|
||||||
|
|
||||||
|
Common::UniqueFunction<void> callback{};
|
||||||
|
{
|
||||||
|
std::unique_lock lk{submit_mutex};
|
||||||
|
callback = std::move(command_queue.back());
|
||||||
|
command_queue.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
callback();
|
||||||
|
|
||||||
|
--num_commands;
|
||||||
|
}
|
||||||
|
|
||||||
qid = (qid + 1) % NumTotalQueues;
|
qid = (qid + 1) % NumTotalQueues;
|
||||||
|
|
||||||
auto& queue = mapped_queues[qid];
|
auto& queue = mapped_queues[qid];
|
||||||
|
@ -219,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
// In the case of HW, render target memory has alignment as color block operates on
|
// In the case of HW, render target memory has alignment as color block operates on
|
||||||
// tiles. There is no information of actual resource extents stored in CB context
|
// tiles. There is no information of actual resource extents stored in CB context
|
||||||
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
|
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
|
||||||
// The same applies to the depth targets. Fortunatelly, the guest always sends
|
// The same applies to the depth targets. Fortunately, the guest always sends
|
||||||
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
|
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
|
||||||
// below and extract the hint to determine actual resource dims.
|
// below and extract the hint to determine actual resource dims.
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,12 @@
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "common/unique_function.h"
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
|
@ -1054,6 +1056,13 @@ public:
|
||||||
rasterizer = rasterizer_;
|
rasterizer = rasterizer_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SendCommand(Common::UniqueFunction<void>&& func) {
|
||||||
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
command_queue.emplace(std::move(func));
|
||||||
|
++num_commands;
|
||||||
|
submit_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Task {
|
struct Task {
|
||||||
struct promise_type {
|
struct promise_type {
|
||||||
|
@ -1122,9 +1131,11 @@ private:
|
||||||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||||
std::jthread process_thread{};
|
std::jthread process_thread{};
|
||||||
std::atomic<u32> num_submits{};
|
std::atomic<u32> num_submits{};
|
||||||
|
std::atomic<u32> num_commands{};
|
||||||
std::atomic<bool> submit_done{};
|
std::atomic<bool> submit_done{};
|
||||||
std::mutex submit_mutex;
|
std::mutex submit_mutex;
|
||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
|
|
|
@ -48,13 +48,14 @@ public:
|
||||||
VAddr cpu_address, bool is_eop) {
|
VAddr cpu_address, bool is_eop) {
|
||||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||||
const auto image_id = texture_cache.FindImage(info);
|
const auto image_id = texture_cache.FindImage(info);
|
||||||
|
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
|
||||||
auto& image = texture_cache.GetImage(image_id);
|
auto& image = texture_cache.GetImage(image_id);
|
||||||
return PrepareFrameInternal(image, is_eop);
|
return PrepareFrameInternal(image, is_eop);
|
||||||
}
|
}
|
||||||
|
|
||||||
Frame* PrepareBlankFrame() {
|
Frame* PrepareBlankFrame(bool is_eop) {
|
||||||
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
||||||
return PrepareFrameInternal(image, true);
|
return PrepareFrameInternal(image, is_eop);
|
||||||
}
|
}
|
||||||
|
|
||||||
VideoCore::Image& RegisterVideoOutSurface(
|
VideoCore::Image& RegisterVideoOutSurface(
|
||||||
|
@ -75,6 +76,11 @@ public:
|
||||||
void Present(Frame* frame);
|
void Present(Frame* frame);
|
||||||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||||
|
|
||||||
|
void FlushDraw() {
|
||||||
|
SubmitInfo info{};
|
||||||
|
draw_scheduler.Flush(info);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
|
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
|
||||||
Frame* GetRenderFrame();
|
Frame* GetRenderFrame();
|
||||||
|
|
|
@ -223,7 +223,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
||||||
return RegisterImageView(image_id, view_info);
|
return RegisterImageView(image_id, view_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::RefreshImage(Image& image) {
|
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
|
||||||
// Mark image as validated.
|
// Mark image as validated.
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
|
@ -269,8 +269,10 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler.EndRendering();
|
auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
sched_ptr->EndRendering();
|
||||||
|
|
||||||
|
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
|
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
|
||||||
|
|
||||||
const VAddr image_addr = image.info.guest_address;
|
const VAddr image_addr = image.info.guest_address;
|
||||||
|
|
|
@ -59,17 +59,17 @@ public:
|
||||||
const ImageViewInfo& view_info);
|
const ImageViewInfo& view_info);
|
||||||
|
|
||||||
/// Updates image contents if it was modified by CPU.
|
/// Updates image contents if it was modified by CPU.
|
||||||
void UpdateImage(ImageId image_id) {
|
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
RefreshImage(image);
|
RefreshImage(image, custom_scheduler);
|
||||||
TrackImage(image, image_id);
|
TrackImage(image, image_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reuploads image contents.
|
/// Reuploads image contents.
|
||||||
void RefreshImage(Image& image);
|
void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
|
||||||
|
|
||||||
/// Retrieves the sampler that matches the provided S# descriptor.
|
/// Retrieves the sampler that matches the provided S# descriptor.
|
||||||
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
||||||
|
|
Loading…
Reference in New Issue