diff --git a/src/common/config.cpp b/src/common/config.cpp index 9f55cbd4..04d67d44 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -20,6 +20,7 @@ static std::string userName = "shadPS4"; static bool isDebugDump = false; static bool isShowSplash = false; static bool isNullGpu = false; +static bool shouldCopyGPUBuffers = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; @@ -93,6 +94,10 @@ bool nullGpu() { return isNullGpu; } +bool copyGPUCmdBuffers() { + return shouldCopyGPUBuffers; +} + bool dumpShaders() { return shouldDumpShaders; } @@ -149,6 +154,10 @@ void setNullGpu(bool enable) { isNullGpu = enable; } +void setCopyGPUCmdBuffers(bool enable) { + shouldCopyGPUBuffers = enable; +} + void setDumpShaders(bool enable) { shouldDumpShaders = enable; } @@ -336,6 +345,7 @@ void load(const std::filesystem::path& path) { screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); vblankDivider = toml::find_or(gpu, "vblankDivider", 1); @@ -414,6 +424,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; + data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; data["GPU"]["vblankDivider"] = vblankDivider; diff --git a/src/common/config.h b/src/common/config.h index 554515a4..f1347b07 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,6 +24,7 @@ s32 getGpuId(); bool debugDump(); bool showSplash(); bool nullGpu(); +bool copyGPUCmdBuffers(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); @@ -33,6 +34,7 @@ u32 vblankDiv(); void setDebugDump(bool enable); void setShowSplash(bool enable); void setNullGpu(bool enable); +void setCopyGPUCmdBuffers(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5b3db603..cec3b06b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -5,6 +5,7 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "common/config.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -568,9 +569,45 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } +void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { + auto& queue = mapped_queues[GfxQueueId]; + + // This is fine because resize doesn't reallocate the buffer on shrink + queue.dcb_buffer.resize(queue.dcb_buffer_offset + dcb.size()); + queue.ccb_buffer.resize(queue.ccb_buffer_offset + dcb.size()); + + u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; + u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; + if (!dcb.empty()) { + std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), + dcb.size_bytes()); + queue.dcb_buffer_offset += dcb.size(); + } + + if (!ccb.empty()) { + std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), + ccb.size_bytes()); + queue.ccb_buffer_offset += dcb.size(); + } + + if (!queue.dcb_buffer.empty()) { + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; + } + + if (!queue.ccb_buffer.empty()) { + ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, + queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; + } +} + void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; + if (Config::copyGPUCmdBuffers()) { + CopyCmdBuffers(dcb, ccb); + } + auto task = ProcessGraphics(dcb, ccb); { std::scoped_lock lock{queue.m_access}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2806f330..a4e61c92 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "common/assert.h" #include "common/bit_field.h" @@ -1047,6 +1048,8 @@ public: void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; + mapped_queues[GfxQueueId].ccb_buffer_offset = 0; + mapped_queues[GfxQueueId].dcb_buffer_offset = 0; submit_done = true; submit_cv.notify_one(); } @@ -1108,6 +1111,7 @@ private: Handle handle; }; + void CopyCmdBuffers(std::span& dcb, std::span& ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1116,6 +1120,10 @@ private: struct GpuQueue { std::mutex m_access{}; + std::atomic_uint32_t dcb_buffer_offset; + std::atomic_uint32_t ccb_buffer_offset; + std::vector dcb_buffer; + std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; };