From 659e7a467549ebca374acc859c8a0e3d47c0e0b6 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:16:14 +0200 Subject: [PATCH 1/7] video_core: Added copyGPUCmdBuffers option --- src/common/config.cpp | 11 +++++++++ src/common/config.h | 2 ++ src/video_core/amdgpu/liverpool.cpp | 37 +++++++++++++++++++++++++++++ src/video_core/amdgpu/liverpool.h | 8 +++++++ 4 files changed, 58 insertions(+) diff --git a/src/common/config.cpp b/src/common/config.cpp index 9f55cbd4..04d67d44 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -20,6 +20,7 @@ static std::string userName = "shadPS4"; static bool isDebugDump = false; static bool isShowSplash = false; static bool isNullGpu = false; +static bool shouldCopyGPUBuffers = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; @@ -93,6 +94,10 @@ bool nullGpu() { return isNullGpu; } +bool copyGPUCmdBuffers() { + return shouldCopyGPUBuffers; +} + bool dumpShaders() { return shouldDumpShaders; } @@ -149,6 +154,10 @@ void setNullGpu(bool enable) { isNullGpu = enable; } +void setCopyGPUCmdBuffers(bool enable) { + shouldCopyGPUBuffers = enable; +} + void setDumpShaders(bool enable) { shouldDumpShaders = enable; } @@ -336,6 +345,7 @@ void load(const std::filesystem::path& path) { screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); vblankDivider = toml::find_or(gpu, "vblankDivider", 1); @@ -414,6 +424,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; + data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; data["GPU"]["vblankDivider"] = vblankDivider; diff --git a/src/common/config.h b/src/common/config.h index 554515a4..f1347b07 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,6 +24,7 @@ s32 getGpuId(); bool debugDump(); bool showSplash(); bool nullGpu(); +bool copyGPUCmdBuffers(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); @@ -33,6 +34,7 @@ u32 vblankDiv(); void setDebugDump(bool enable); void setShowSplash(bool enable); void setNullGpu(bool enable); +void setCopyGPUCmdBuffers(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5b3db603..cec3b06b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -5,6 +5,7 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "common/config.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -568,9 +569,45 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } +void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { + auto& queue = mapped_queues[GfxQueueId]; + + // This is fine because resize doesn't reallocate the buffer on shrink + queue.dcb_buffer.resize(queue.dcb_buffer_offset + dcb.size()); + queue.ccb_buffer.resize(queue.ccb_buffer_offset + dcb.size()); + + u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; + u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; + if (!dcb.empty()) { + std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), + dcb.size_bytes()); + queue.dcb_buffer_offset += dcb.size(); + } + + if (!ccb.empty()) { + std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), + ccb.size_bytes()); + queue.ccb_buffer_offset += dcb.size(); + } + + if (!queue.dcb_buffer.empty()) { + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; + } + + if (!queue.ccb_buffer.empty()) { + ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, + queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; + } +} + void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; + if (Config::copyGPUCmdBuffers()) { + CopyCmdBuffers(dcb, ccb); + } + auto task = ProcessGraphics(dcb, ccb); { std::scoped_lock lock{queue.m_access}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2806f330..a4e61c92 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "common/assert.h" #include "common/bit_field.h" @@ -1047,6 +1048,8 @@ public: void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; + mapped_queues[GfxQueueId].ccb_buffer_offset = 0; + mapped_queues[GfxQueueId].dcb_buffer_offset = 0; submit_done = true; submit_cv.notify_one(); } @@ -1108,6 +1111,7 @@ private: Handle handle; }; + void CopyCmdBuffers(std::span& dcb, std::span& ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1116,6 +1120,10 @@ private: struct GpuQueue { std::mutex m_access{}; + std::atomic_uint32_t dcb_buffer_offset; + std::atomic_uint32_t ccb_buffer_offset; + std::vector dcb_buffer; + std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; }; From 595b845df0f6776cb44201217a7cdc2f7cb679d3 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:31:04 +0200 Subject: [PATCH 2/7] clang-format fix --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- src/video_core/amdgpu/liverpool.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index cec3b06b..4b8a8315 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -593,8 +593,8 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& if (!queue.dcb_buffer.empty()) { dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; - } - + } + if (!queue.ccb_buffer.empty()) { ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index a4e61c92..c12c8fdb 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include "common/assert.h" #include "common/bit_field.h" From 3d46a5d4920fdf5ab04fb20e6f86bde87235d77c Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:33:24 +0200 Subject: [PATCH 3/7] Do not shrink buffer's size on submit --- src/video_core/amdgpu/liverpool.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 4b8a8315..93099266 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,8 +573,10 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& auto& queue = mapped_queues[GfxQueueId]; // This is fine because resize doesn't reallocate the buffer on shrink - queue.dcb_buffer.resize(queue.dcb_buffer_offset + dcb.size()); - queue.ccb_buffer.resize(queue.ccb_buffer_offset + dcb.size()); + queue.dcb_buffer.resize( + std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); + queue.ccb_buffer.resize( + std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + dcb.size())); u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; From 3842993a4310e5f6c392f0898c795a50e5a731a2 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 00:21:12 +0200 Subject: [PATCH 4/7] Use input dcb and ccb instead of copy --- src/video_core/amdgpu/liverpool.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 93099266..3db9ff0d 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -572,7 +572,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { auto& queue = mapped_queues[GfxQueueId]; - // This is fine because resize doesn't reallocate the buffer on shrink queue.dcb_buffer.resize( std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); queue.ccb_buffer.resize( @@ -584,20 +583,14 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), dcb.size_bytes()); queue.dcb_buffer_offset += dcb.size(); + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; } if (!ccb.empty()) { std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), ccb.size_bytes()); queue.ccb_buffer_offset += dcb.size(); - } - - if (!queue.dcb_buffer.empty()) { - dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, - queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; - } - - if (!queue.ccb_buffer.empty()) { ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } From 1a02efbd156ff7b07bfa306cd94c39862cba8d56 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 05:42:48 +0200 Subject: [PATCH 5/7] clang-format style fix --- src/video_core/amdgpu/liverpool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 3db9ff0d..6e49aec4 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" -#include "common/config.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" From 87ccfdfbbdb7b98ba1169174096baffcd1d93729 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 09:42:31 +0200 Subject: [PATCH 6/7] Fixed type on function --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 6e49aec4..931b5ea8 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -575,7 +575,7 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& queue.dcb_buffer.resize( std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); queue.ccb_buffer.resize( - std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + dcb.size())); + std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size())); u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; @@ -590,7 +590,7 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& if (!ccb.empty()) { std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), ccb.size_bytes()); - queue.ccb_buffer_offset += dcb.size(); + queue.ccb_buffer_offset += ccb.size(); ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } From dfb30ea955279e82f3c86cf9f0f8a0b59c2dcdfd Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 11:24:15 +0200 Subject: [PATCH 7/7] Use pair of spans instead of references in copy command buffers function --- src/video_core/amdgpu/liverpool.cpp | 7 +++++-- src/video_core/amdgpu/liverpool.h | 7 ++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 931b5ea8..a3e64b7c 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -569,7 +569,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } -void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { +std::pair, std::span> Liverpool::CopyCmdBuffers( + std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; queue.dcb_buffer.resize( @@ -594,13 +595,15 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } + + return std::make_pair(dcb, ccb); } void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; if (Config::copyGPUCmdBuffers()) { - CopyCmdBuffers(dcb, ccb); + std::tie(dcb, ccb) = CopyCmdBuffers(dcb, ccb); } auto task = ProcessGraphics(dcb, ccb); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c12c8fdb..14284bbc 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1111,7 +1111,8 @@ private: Handle handle; }; - void CopyCmdBuffers(std::span& dcb, std::span& ccb); + std::pair, std::span> CopyCmdBuffers(std::span dcb, + std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1120,8 +1121,8 @@ private: struct GpuQueue { std::mutex m_access{}; - std::atomic_uint32_t dcb_buffer_offset; - std::atomic_uint32_t ccb_buffer_offset; + std::atomic dcb_buffer_offset; + std::atomic ccb_buffer_offset; std::vector dcb_buffer; std::vector ccb_buffer; std::queue submits{};