From e89b2d1cddb7129f38ac6178da331adb9e900a33 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 10 Jun 2024 17:20:49 +0200 Subject: [PATCH 1/5] Added Tracy profiler --- .gitmodules | 3 +++ .reuse/dep5 | 1 + CMakeLists.txt | 2 +- externals/CMakeLists.txt | 8 ++++++++ externals/tracy | 1 + src/common/debug.h | 2 ++ 6 files changed, 16 insertions(+), 1 deletion(-) create mode 160000 externals/tracy diff --git a/.gitmodules b/.gitmodules index 7ac7ad42..3284ecce 100644 --- a/.gitmodules +++ b/.gitmodules @@ -55,3 +55,6 @@ [submodule "externals/xxhash"] path = externals/xxhash url = https://github.com/Cyan4973/xxHash.git +[submodule "externals/tracy"] + path = externals/tracy + url = https://github.com/shadps4-emu/tracy diff --git a/.reuse/dep5 b/.reuse/dep5 index 283c680b..ed58eb79 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -34,5 +34,6 @@ Files: CMakeSettings.json src/shadps4.rc src/shadps4.qrc externals/stb_image.h + externals/tracy/* Copyright: shadPS4 Emulator Project License: GPL-2.0-or-later diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bb549d2..a9f47acc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -500,7 +500,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak Tracy::TracyClient) target_link_libraries(shadps4 PRIVATE discord-rpc boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) if (NOT ENABLE_QT_GUI) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b2d348b7..4e60988c 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -93,3 +93,11 @@ add_subdirectory(sirit EXCLUDE_FROM_ALL) if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") endif() + +# Tracy +option(TRACY_ENABLE "" ON) +option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash +option(TRACY_ON_DEMAND "" ON) +option(TRACY_NO_FRAME_IMAGE "" ON) +option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling +add_subdirectory(tracy EXCLUDE_FROM_ALL) diff --git a/externals/tracy b/externals/tracy new file mode 160000 index 00000000..ef96964f --- /dev/null +++ b/externals/tracy @@ -0,0 +1 @@ +Subproject commit ef96964f71885e9ff177253ce0465569787e4a4c diff --git a/src/common/debug.h b/src/common/debug.h index e1c898a3..f89590aa 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -10,3 +10,5 @@ #else #error What the fuck is this compiler #endif + +#include From 04b1226e9c1aabc58f5643026d848bf10547801d Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 11 Jun 2024 12:14:33 +0200 Subject: [PATCH 2/5] tracy: basic markup and project palette --- src/common/debug.h | 37 +++++++++++++++++++ src/common/logging/backend.cpp | 19 ++++++++++ src/core/memory.cpp | 3 ++ src/emulator.cpp | 2 + src/video_core/amdgpu/liverpool.cpp | 23 ++++++++++++ .../renderer_vulkan/vk_rasterizer.cpp | 5 +++ 6 files changed, 89 insertions(+) diff --git a/src/common/debug.h b/src/common/debug.h index f89590aa..98f6d3eb 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -12,3 +12,40 @@ #endif #include + +static inline bool IsProfilerConnected() { + return tracy::GetProfiler().IsConnected(); +} + +#define CUSTOM_LOCK(type, varname) \ + tracy::LockableCtx varname { \ + []() -> const tracy::SourceLocationData* { \ + static constexpr tracy::SourceLocationData srcloc{nullptr, #type " " #varname, \ + TracyFile, TracyLine, 0}; \ + return &srcloc; \ + }() \ + } + +#define TRACK_ALLOC(ptr, size, pool) TracyAllocN(std::bit_cast(ptr), (size), (pool)) +#define TRACK_FREE(ptr, pool) TracyFreeN(std::bit_cast(ptr), (pool)) + +enum MarkersPallete : int { + EmulatorMarkerColor = 0x264653, + RendererMarkerColor = 0x2a9d8f, + HleMarkerColor = 0xe9c46a, + Reserved0 = 0xf4a261, + Reserved1 = 0xe76f51, +}; + +#define EMULATOR_TRACE ZoneScopedC(EmulatorMarkerColor) +#define RENDERER_TRACE ZoneScopedC(RendererMarkerColor) +#define HLE_TRACE ZoneScopedC(HleMarkerColor) + +#define TRACE_WARN(msg) \ + [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::DarkOrange); }(msg); +#define TRACE_ERROR(msg) \ + [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::Red); }(msg) +#define TRACE_CRIT(msg) \ + [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg) + +#define FRAME_END FrameMark diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 0fd344b5..460b7376 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -13,6 +13,7 @@ #include "common/bounded_threadsafe_queue.h" #include "common/config.h" +#include "common/debug.h" #include "common/io_file.h" #include "common/logging/backend.h" #include "common/logging/log.h" @@ -167,6 +168,24 @@ public: void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, std::string message) { + // Propagate important log messages to the profiler + if (IsProfilerConnected()) { + const auto& msg_str = std::format("[{}] {}", GetLogClassName(log_class), message); + switch (log_level) { + case Level::Warning: + TRACE_WARN(msg_str); + break; + case Level::Error: + TRACE_ERROR(msg_str); + break; + case Level::Critical: + TRACE_CRIT(msg_str); + break; + default: + break; + } + } + if (!filter.CheckMessage(log_class, log_level)) { return; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a6068053..5029f82c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -3,6 +3,7 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/debug.h" #include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/memory_management.h" @@ -123,6 +124,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Perform the mapping. *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec); + TRACK_ALLOC(*out_addr, size, "VMEM"); return ORBIS_OK; } @@ -149,6 +151,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { // Unmap the memory region. impl.Unmap(virtual_addr, size, phys_addr); + TRACK_FREE(virtual_addr, "VMEM"); } int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) { diff --git a/src/emulator.cpp b/src/emulator.cpp index 793d996a..c5facd19 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -8,6 +8,7 @@ #include #include #include "common/config.h" +#include "common/debug.h" #include "common/logging/backend.h" #include "common/path_util.h" #include "common/singleton.h" @@ -121,6 +122,7 @@ void Emulator::Run(const std::filesystem::path& file) { window.waitEvent(); Libraries::VideoOut::Flip(FlipPeriod); Libraries::VideoOut::Vblank(); + FRAME_END; } std::exit(0); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 7f275e8c..e0cf86aa 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/debug.h" #include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -9,6 +10,10 @@ namespace AmdGpu { +static const char* dcb_task_name{"DCB_TASK"}; +static const char* ccb_task_name{"CCB_TASK"}; +static const char* asc_task_name{"ACB_TASK"}; + std::array Liverpool::ConstantEngine::constants_heap; Liverpool::Liverpool() { @@ -69,12 +74,16 @@ void Liverpool::Process(std::stop_token stoken) { } void Liverpool::WaitGpuIdle() { + RENDERER_TRACE; + while (const auto old = num_submits.load()) { num_submits.wait(old); } } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { + TracyFiberEnter(ccb_task_name); + while (!ccb.empty()) { const auto* header = reinterpret_cast(ccb.data()); const u32 type = header->type; @@ -109,7 +118,9 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { case PM4ItOpcode::WaitOnDeCounterDiff: { const auto diff = it_body[0]; while ((cblock.de_count - cblock.ce_count) >= diff) { + TracyFiberLeave; co_yield {}; + TracyFiberEnter(ccb_task_name); } break; } @@ -120,9 +131,13 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { } ccb = ccb.subspan(header->type3.NumWords() + 1); } + + TracyFiberLeave; } Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span ccb) { + TracyFiberEnter(dcb_task_name); + cblock.Reset(); // TODO: potentially, ASCs also can depend on CE and in this case the @@ -132,7 +147,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { + TracyFiberLeave; co_yield {}; + TracyFiberEnter(dcb_task_name); } break; } @@ -340,7 +359,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span acb) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 291d38fd..abb6d328 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "common/debug.h" #include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -33,6 +34,8 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, Rasterizer::~Rasterizer() = default; void Rasterizer::Draw(bool is_indexed, u32 index_offset) { + RENDERER_TRACE; + const auto cmdbuf = scheduler.CommandBuffer(); const auto& regs = liverpool->regs; const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset); @@ -104,6 +107,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { } void Rasterizer::DispatchDirect() { + RENDERER_TRACE; + const auto cmdbuf = scheduler.CommandBuffer(); const auto& cs_program = liverpool->regs.cs_program; const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); From d7565dec5724be81638950217b58d9ae609db542 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 11 Jun 2024 21:52:48 +0200 Subject: [PATCH 3/5] tracy: added Vulkan GPU profiling --- src/common/debug.h | 5 +- .../renderer_vulkan/renderer_vulkan.cpp | 109 +++++++++--------- .../renderer_vulkan/vk_instance.cpp | 23 ++++ src/video_core/renderer_vulkan/vk_instance.h | 8 ++ .../renderer_vulkan/vk_scheduler.cpp | 15 ++- src/video_core/renderer_vulkan/vk_scheduler.h | 2 + 6 files changed, 107 insertions(+), 55 deletions(-) diff --git a/src/common/debug.h b/src/common/debug.h index 98f6d3eb..ea1dff7d 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -33,7 +33,7 @@ enum MarkersPallete : int { EmulatorMarkerColor = 0x264653, RendererMarkerColor = 0x2a9d8f, HleMarkerColor = 0xe9c46a, - Reserved0 = 0xf4a261, + GpuMarkerColor = 0xf4a261, Reserved1 = 0xe76f51, }; @@ -48,4 +48,7 @@ enum MarkersPallete : int { #define TRACE_CRIT(msg) \ [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg) +#define GPU_SCOPE_LOCATION(name, color) \ + tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color}; + #define FRAME_END FrameMark diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 572316af..ecce9bb5 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "common/debug.h" #include "common/singleton.h" #include "core/file_format/splash.h" #include "core/libraries/system/systemservice.h" @@ -270,14 +271,50 @@ void RendererVulkan::Present(Frame* frame) { }; const vk::CommandBuffer cmdbuf = frame->cmdbuf; cmdbuf.begin(begin_info); + { + TracyVkZoneC(instance.GetProfilerContext(), cmdbuf, "Host frame", + MarkersPallete::GpuMarkerColor); - const vk::Extent2D extent = swapchain.GetExtent(); - const std::array pre_barriers{ - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eNone, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eTransferDstOptimal, + const vk::Extent2D extent = swapchain.GetExtent(); + const std::array pre_barriers{ + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const vk::ImageMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = swapchain_image, @@ -288,54 +325,22 @@ void RendererVulkan::Present(Frame* frame) { .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - }, - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .oldLayout = vk::ImageLayout::eGeneral, - .newLayout = vk::ImageLayout::eTransferSrcOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = frame->image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - }; - const vk::ImageMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::ePresentSrcKHR, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapchain_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, {}, pre_barriers); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); - cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, - vk::ImageLayout::eTransferDstOptimal, - MakeImageBlit(frame->width, frame->height, extent.width, extent.height), - vk::Filter::eLinear); - - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageBlit(frame->width, frame->height, extent.width, extent.height), + vk::Filter::eLinear); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + } + TracyVkCollect(instance.GetProfilerContext(), cmdbuf); cmdbuf.end(); static constexpr std::array wait_stage_masks = { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 6d19452d..06a47675 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -160,6 +160,7 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); + const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -212,6 +213,7 @@ bool Instance::CreateDevice() { }, vk::PhysicalDeviceVulkan12Features{ .scalarBlockLayout = true, + .hostQueryReset = true, .timelineSemaphore = true, }, vk::PhysicalDeviceVulkan13Features{ @@ -251,6 +253,27 @@ bool Instance::CreateDevice() { graphics_queue = device->getQueue(queue_family_index, 0); present_queue = device->getQueue(queue_family_index, 0); + if (calibrated_timestamps) { + const auto& time_domains = physical_device.getCalibrateableTimeDomainsEXT(); +#if _WIN64 + const bool has_host_time_domain = + std::find(time_domains.cbegin(), time_domains.cend(), + vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend(); +#else + const bool has_host_time_domain = + std::find(time_domains.cbegin(), time_domains.cend(), + vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend(); +#endif + if (has_host_time_domain) { + static constexpr std::string_view context_name{"vk_rasterizer"}; + profiler_context = + TracyVkContextHostCalibrated(*instance, physical_device, *device, + VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr, + VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr); + TracyVkContextName(profiler_context, context_name.data(), context_name.size()); + } + } + CreateAllocator(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 797eb886..f8e3c2e9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -7,6 +7,9 @@ #include "video_core/renderer_vulkan/vk_platform.h" +#define TRACY_VK_USE_SYMBOL_TABLE +#include + namespace Frontend { class WindowSDL; } @@ -67,6 +70,10 @@ public: return present_queue; } + TracyVkCtx GetProfilerContext() const { + return profiler_context; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -208,6 +215,7 @@ private: vk::Queue graphics_queue; std::vector physical_devices; std::vector available_extensions; + TracyVkCtx profiler_context{}; u32 queue_family_index{0}; bool image_view_reinterpretation{true}; bool timeline_semaphores{}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 8e265f72..54cd6974 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,17 +2,21 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/debug.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { Scheduler::Scheduler(const Instance& instance) - : master_semaphore{instance}, command_pool{instance, &master_semaphore} { + : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} { + profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); AllocateWorkerCommandBuffers(); } -Scheduler::~Scheduler() = default; +Scheduler::~Scheduler() { + std::free(profiler_scope); +} void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { // When flushing, we only send data to the worker thread; no waiting is necessary. @@ -41,11 +45,18 @@ void Scheduler::AllocateWorkerCommandBuffers() { current_cmdbuf = command_pool.Commit(); current_cmdbuf.begin(begin_info); + + static const auto scope_loc = GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor); + new (profiler_scope) + tracy::VkCtxScope{instance.GetProfilerContext(), &scope_loc, current_cmdbuf, true}; } void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { const u64 signal_value = master_semaphore.NextTick(); + profiler_scope->~VkCtxScope(); + TracyVkCollect(instance.GetProfilerContext(), current_cmdbuf); + std::scoped_lock lk{submit_mutex}; master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); master_semaphore.Refresh(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index fde48824..284c288a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -54,10 +54,12 @@ private: void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); private: + const Instance& instance; MasterSemaphore master_semaphore; CommandPool command_pool; vk::CommandBuffer current_cmdbuf; std::condition_variable_any event_cv; + tracy::VkCtxScope* profiler_scope{}; }; } // namespace Vulkan From 955752a24b371fdcdf52e75fee8e3bfe16187489 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 11 Jun 2024 21:58:35 +0200 Subject: [PATCH 4/5] tracy: submodule update --- externals/tracy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/tracy b/externals/tracy index ef96964f..c6d779d7 160000 --- a/externals/tracy +++ b/externals/tracy @@ -1 +1 @@ -Subproject commit ef96964f71885e9ff177253ce0465569787e4a4c +Subproject commit c6d779d78508514102fbe1b8eb28bda10d95bb2a From 64569ff7375652a38771c01051e85d4dd71d1079 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 11 Jun 2024 22:57:37 +0200 Subject: [PATCH 5/5] tracy: guards for missing vk profiler context --- .../renderer_vulkan/renderer_vulkan.cpp | 10 +++++++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 16 +++++++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ecce9bb5..87fb447b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -272,8 +272,9 @@ void RendererVulkan::Present(Frame* frame) { const vk::CommandBuffer cmdbuf = frame->cmdbuf; cmdbuf.begin(begin_info); { - TracyVkZoneC(instance.GetProfilerContext(), cmdbuf, "Host frame", - MarkersPallete::GpuMarkerColor); + auto* profiler_ctx = instance.GetProfilerContext(); + TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", + MarkersPallete::GpuMarkerColor, profiler_ctx != nullptr); const vk::Extent2D extent = swapchain.GetExtent(); const std::array pre_barriers{ @@ -339,8 +340,11 @@ void RendererVulkan::Present(Frame* frame) { cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + + if (profiler_ctx) { + TracyVkCollect(profiler_ctx, cmdbuf); + } } - TracyVkCollect(instance.GetProfilerContext(), cmdbuf); cmdbuf.end(); static constexpr std::array wait_stage_masks = { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 54cd6974..7ed311f7 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -46,16 +46,22 @@ void Scheduler::AllocateWorkerCommandBuffers() { current_cmdbuf = command_pool.Commit(); current_cmdbuf.begin(begin_info); - static const auto scope_loc = GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor); - new (profiler_scope) - tracy::VkCtxScope{instance.GetProfilerContext(), &scope_loc, current_cmdbuf, true}; + auto* profiler_ctx = instance.GetProfilerContext(); + if (profiler_ctx) { + static const auto scope_loc = + GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor); + new (profiler_scope) tracy::VkCtxScope{profiler_ctx, &scope_loc, current_cmdbuf, true}; + } } void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { const u64 signal_value = master_semaphore.NextTick(); - profiler_scope->~VkCtxScope(); - TracyVkCollect(instance.GetProfilerContext(), current_cmdbuf); + auto* profiler_ctx = instance.GetProfilerContext(); + if (profiler_ctx) { + profiler_scope->~VkCtxScope(); + TracyVkCollect(profiler_ctx, current_cmdbuf); + } std::scoped_lock lk{submit_mutex}; master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);