From 0d6edaa0a0282571b07d5e518e72231a2d863927 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 28 Jul 2024 16:54:09 +0300 Subject: [PATCH] Move presentation to separate thread/improve sync (#303) * video_out: Move presentation to separate thread * liverpool: Better sync for CPU flips * driver: Make flip blocking * videoout: Proper flip rate and vblank management * config: Add vblank divider option * clang format * videoout: added `sceVideoOutWaitVblank` * clang format * vk_scheduler: Silly merge conflict * externals: Add renderdoc API * clang format * reuse * rdoc: manual capture trigger * clang fmt --------- Co-authored-by: psucien <168137814+psucien@users.noreply.github.com> --- .reuse/dep5 | 4 + CMakeLists.txt | 5 +- externals/CMakeLists.txt | 7 + externals/renderdoc/renderdoc_app.h | 741 ++++++++++++++++++ src/common/config.cpp | 18 +- src/common/config.h | 2 + src/common/path_util.cpp | 1 + src/common/path_util.h | 2 + src/core/libraries/gnmdriver/gnmdriver.cpp | 4 +- src/core/libraries/kernel/event_queue.cpp | 4 +- src/core/libraries/libs.cpp | 2 +- src/core/libraries/videoout/driver.cpp | 103 ++- src/core/libraries/videoout/driver.h | 38 +- src/core/libraries/videoout/video_out.cpp | 22 +- src/core/libraries/videoout/video_out.h | 6 +- src/emulator.cpp | 27 +- src/sdl_window.cpp | 8 +- src/video_core/amdgpu/liverpool.cpp | 35 +- src/video_core/amdgpu/liverpool.h | 22 + src/video_core/amdgpu/pm4_cmds.h | 14 +- src/video_core/renderdoc.cpp | 120 +++ src/video_core/renderdoc.h | 25 + .../renderer_vulkan/renderer_vulkan.cpp | 98 +-- .../renderer_vulkan/renderer_vulkan.h | 27 +- .../renderer_vulkan/vk_master_semaphore.cpp | 44 -- .../renderer_vulkan/vk_master_semaphore.h | 4 - .../renderer_vulkan/vk_rasterizer.cpp | 7 + .../renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_scheduler.cpp | 54 +- src/video_core/renderer_vulkan/vk_scheduler.h | 33 +- .../renderer_vulkan/vk_swapchain.cpp | 2 +- src/video_core/texture_cache/tile_manager.cpp | 2 +- 32 files changed, 1259 insertions(+), 224 deletions(-) create mode 100644 externals/renderdoc/renderdoc_app.h create mode 100644 src/video_core/renderdoc.cpp create mode 100644 src/video_core/renderdoc.h diff --git a/.reuse/dep5 b/.reuse/dep5 index 1dad5014..c467a164 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -42,3 +42,7 @@ Files: CMakeSettings.json src/shadps4.rc Copyright: shadPS4 Emulator Project License: GPL-2.0-or-later + +Files: externals/renderdoc/* +Copyright: 2019-2024 Baldur Karlsson +License: MIT diff --git a/CMakeLists.txt b/CMakeLists.txt index 880d1cf5..08cc4103 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,7 @@ find_package(xbyak 7.07 CONFIG) find_package(xxHash 0.8.2 MODULE) find_package(zlib-ng 2.2.0 MODULE) find_package(Zydis 4.1.0 CONFIG) +find_package(RenderDoc MODULE) if (APPLE) find_package(date 3.0.1 CONFIG) @@ -484,6 +485,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h + src/video_core/renderdoc.cpp + src/video_core/renderdoc.h ) set(INPUT src/input/controller.cpp @@ -559,7 +562,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) if (APPLE) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 7fca7b54..9ebdd878 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -74,6 +74,13 @@ if (NOT TARGET GPUOpen::VulkanMemoryAllocator) add_subdirectory(vma) endif() +# RenderDoc +if (NOT TARGET RenderDoc::API) + add_library(renderdoc INTERFACE) + target_include_directories(renderdoc SYSTEM INTERFACE ./renderdoc) + add_library(RenderDoc::API ALIAS renderdoc) +endif() + # glslang if (NOT TARGET glslang::glslang) set(SKIP_GLSLANG_INSTALL ON CACHE BOOL "") diff --git a/externals/renderdoc/renderdoc_app.h b/externals/renderdoc/renderdoc_app.h new file mode 100644 index 00000000..c01e0593 --- /dev/null +++ b/externals/renderdoc/renderdoc_app.h @@ -0,0 +1,741 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) || defined(__FreeBSD__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { \ + 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { \ + 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption +{ + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton +{ + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits +{ + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | + eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, + uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, + const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, + const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version +{ + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() + +typedef struct RENDERDOC_API_1_6_0 +{ + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union + { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union + { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/src/common/config.cpp b/src/common/config.cpp index 218575ff..57f40b21 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -22,8 +22,10 @@ static bool isShowSplash = false; static bool isNullGpu = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; +static u32 vblankDivider = 1; static bool vkValidation = false; static bool vkValidationSync = false; +static bool rdocEnable = false; // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -94,6 +96,14 @@ bool dumpPM4() { return shouldDumpPM4; } +bool isRdocEnabled() { + return rdocEnable; +} + +u32 vblankDiv() { + return vblankDivider; +} + bool vkValidationEnabled() { return vkValidation; } @@ -233,10 +243,10 @@ void load(const std::filesystem::path& path) { screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); - gpuId = toml::find_or(gpu, "gpuId", 0); isNullGpu = toml::find_or(gpu, "nullGpu", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); + vblankDivider = toml::find_or(gpu, "vblankDivider", 1); } } if (data.contains("Vulkan")) { @@ -244,8 +254,10 @@ void load(const std::filesystem::path& path) { if (vkResult.is_ok()) { auto vk = vkResult.unwrap(); + gpuId = toml::find_or(vk, "gpuId", 0); vkValidation = toml::find_or(vk, "validation", true); vkValidationSync = toml::find_or(vk, "validation_sync", true); + rdocEnable = toml::find_or(vk, "rdocEnable", false); } } if (data.contains("Debug")) { @@ -312,14 +324,16 @@ void save(const std::filesystem::path& path) { data["General"]["logFilter"] = logFilter; data["General"]["logType"] = logType; data["General"]["showSplash"] = isShowSplash; - data["GPU"]["gpuId"] = gpuId; data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; + data["GPU"]["vblankDivider"] = vblankDivider; + data["Vulkan"]["gpuId"] = gpuId; data["Vulkan"]["validation"] = vkValidation; data["Vulkan"]["validation_sync"] = vkValidationSync; + data["Vulkan"]["rdocEnable"] = rdocEnable; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; data["GUI"]["theme"] = mw_themes; diff --git a/src/common/config.h b/src/common/config.h index 0a3b4905..637ac746 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -26,6 +26,8 @@ bool showSplash(); bool nullGpu(); bool dumpShaders(); bool dumpPM4(); +bool isRdocEnabled(); +u32 vblankDiv(); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index f0f56b85..c1e8a5c0 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -73,6 +73,7 @@ static auto UserPaths = [] { create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR); create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR); create_path(PathType::DownloadDir, user_dir / DOWNLOAD_DIR); + create_path(PathType::CapturesDir, user_dir / CAPTURES_DIR); return paths; }(); diff --git a/src/common/path_util.h b/src/common/path_util.h index 67688f89..263edd46 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -19,6 +19,7 @@ enum class PathType { GameDataDir, // Where game data is stored. SysModuleDir, // Where system modules are stored. DownloadDir, // Where downloads/temp files are stored. + CapturesDir, // Where rdoc captures are stored. }; constexpr auto PORTABLE_DIR = "user"; @@ -33,6 +34,7 @@ constexpr auto GAMEDATA_DIR = "data"; constexpr auto TEMPDATA_DIR = "temp"; constexpr auto SYSMODULES_DIR = "sys_modules"; constexpr auto DOWNLOAD_DIR = "download"; +constexpr auto CAPTURES_DIR = "captures"; // Filenames constexpr auto LOG_FILE = "shad_log.txt"; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 866a9698..dba69d6e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -20,13 +20,12 @@ extern Frontend::WindowSDL* g_window; std::unique_ptr renderer; +std::unique_ptr liverpool; namespace Libraries::GnmDriver { using namespace AmdGpu; -static std::unique_ptr liverpool; - enum GnmEventIdents : u64 { Compute0RelMem = 0x00, Compute1RelMem = 0x01, @@ -2131,6 +2130,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() { if (!liverpool->IsGpuIdle()) { submission_lock = true; } + liverpool->SubmitDone(); send_init_packet = true; ++frames_submitted; return ORBIS_OK; diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 6bd88459..3555fddc 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -78,9 +78,7 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) { std::scoped_lock lock{m_mutex}; for (auto& event : m_events) { - ASSERT_MSG(event.event.filter == filter, - "Event to trigger doesn't match to queue events"); - if (event.event.ident == ident) { + if ((event.event.ident == ident) && (event.event.filter == filter)) { event.Trigger(trigger_data); has_found = true; } diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index f9325297..47073b2c 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -43,8 +43,8 @@ namespace Libraries { void InitHLELibs(Core::Loader::SymbolsResolver* sym) { LOG_INFO(Lib_Kernel, "Initializing HLE libraries"); Libraries::Kernel::LibKernel_Register(sym); - Libraries::VideoOut::RegisterLib(sym); Libraries::GnmDriver::RegisterlibSceGnmDriver(sym); + Libraries::VideoOut::RegisterLib(sym); if (!Config::isLleLibc()) { Libraries::LibC::libcSymbolsRegister(sym); } diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index e74fb10f..97b1816e 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -3,14 +3,16 @@ #include #include "common/assert.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/thread.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" -#include "core/platform.h" - #include "video_core/renderer_vulkan/renderer_vulkan.h" extern std::unique_ptr renderer; +extern std::unique_ptr liverpool; namespace Libraries::VideoOut { @@ -41,20 +43,18 @@ VideoOutDriver::VideoOutDriver(u32 width, u32 height) { main_port.resolution.fullHeight = height; main_port.resolution.paneWidth = width; main_port.resolution.paneHeight = height; + present_thread = std::jthread([&](std::stop_token token) { PresentThread(token); }); } VideoOutDriver::~VideoOutDriver() = default; int VideoOutDriver::Open(const ServiceThreadParams* params) { - std::scoped_lock lock{mutex}; - if (main_port.is_open) { return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY; } - - int handle = 1; main_port.is_open = true; - return handle; + liverpool->SetVoPort(&main_port); + return 1; } void VideoOutDriver::Close(s32 handle) { @@ -158,31 +158,22 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) { return ORBIS_OK; } -void VideoOutDriver::Flip(std::chrono::microseconds timeout) { - Request req; - { - std::unique_lock lock{mutex}; - submit_cond.wait_for(lock, timeout, [&] { return !requests.empty(); }); - if (requests.empty()) { - renderer->ShowSplash(); - return; - } - - // Retrieve the request. - req = requests.front(); - requests.pop(); +std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { + if (!req) { + return std::chrono::microseconds{0}; } + const auto start = std::chrono::high_resolution_clock::now(); + // Whatever the game is rendering show splash if it is active if (!renderer->ShowSplash(req.frame)) { // Present the frame. renderer->Present(req.frame); } - std::scoped_lock lock{mutex}; - // Update flip status. - auto& flip_status = req.port->flip_status; + auto* port = req.port; + auto& flip_status = port->flip_status; flip_status.count++; flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); @@ -192,7 +183,7 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { flip_status.flipPendingNum = static_cast(requests.size()); // Trigger flip events for the port. - for (auto& event : req.port->flip_events) { + for (auto& event : port->flip_events) { if (event != nullptr) { event->TriggerEvent(SCE_VIDEO_OUT_EVENT_FLIP, Kernel::SceKernelEvent::Filter::VideoOut, reinterpret_cast(req.flip_arg)); @@ -201,21 +192,23 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { // Reset flip label if (req.index != -1) { - req.port->buffer_labels[req.index] = 0; + port->buffer_labels[req.index] = 0; + port->SignalVoLabel(); } + + const auto end = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(end - start); } bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { - std::scoped_lock lock{mutex}; - Vulkan::Frame* frame; if (index == -1) { frame = renderer->PrepareBlankFrame(); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; - frame = renderer->PrepareFrame(group, buffer.address_left); + frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); } if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { @@ -223,6 +216,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, return false; } + std::scoped_lock lock{mutex}; requests.push({ .frame = frame, .port = port, @@ -234,24 +228,53 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, port->flip_status.flipPendingNum = static_cast(requests.size()); port->flip_status.gcQueueNum = 0; - submit_cond.notify_one(); return true; } -void VideoOutDriver::Vblank() { - std::scoped_lock lock{mutex}; +void VideoOutDriver::PresentThread(std::stop_token token) { + static constexpr std::chrono::milliseconds VblankPeriod{16}; + Common::SetCurrentThreadName("PresentThread"); - auto& vblank_status = main_port.vblank_status; - vblank_status.count++; - vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + const auto receive_request = [this] -> Request { + std::scoped_lock lk{mutex}; + if (!requests.empty()) { + const auto request = requests.front(); + requests.pop(); + return request; + } + return {}; + }; - // Trigger flip events for the port. - for (auto& event : main_port.vblank_events) { - if (event != nullptr) { - event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK, - Kernel::SceKernelEvent::Filter::VideoOut, nullptr); + auto vblank_period = VblankPeriod / Config::vblankDiv(); + auto delay = std::chrono::microseconds{0}; + while (!token.stop_requested()) { + // Sleep for most of the vblank duration. + std::this_thread::sleep_for(vblank_period - delay); + + // Check if it's time to take a request. + auto& vblank_status = main_port.vblank_status; + if (vblank_status.count % (main_port.flip_rate + 1) == 0) { + const auto request = receive_request(); + delay = Flip(request); + FRAME_END; + } + + { + // Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus` + std::unique_lock lock{main_port.vo_mutex}; + vblank_status.count++; + vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + main_port.vblank_cv.notify_all(); + } + + // Trigger flip events for the port. + for (auto& event : main_port.vblank_events) { + if (event != nullptr) { + event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK, + Kernel::SceKernelEvent::Filter::VideoOut, nullptr); + } } } } diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index d98e62ee..104056de 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -3,10 +3,13 @@ #pragma once +#include "common/debug.h" +#include "common/polyfill_thread.h" +#include "core/libraries/videoout/video_out.h" + #include #include #include -#include "core/libraries/videoout/video_out.h" namespace Vulkan { struct Frame; @@ -25,6 +28,9 @@ struct VideoOutPort { SceVideoOutVblankStatus vblank_status; std::vector flip_events; std::vector vblank_events; + std::mutex vo_mutex; + std::condition_variable vo_cv; + std::condition_variable vblank_cv; int flip_rate = 0; s32 FindFreeGroup() const { @@ -35,6 +41,22 @@ struct VideoOutPort { return index; } + bool IsVoLabel(const u64* address) const { + const u64* start = &buffer_labels[0]; + const u64* end = &buffer_labels[MaxDisplayBuffers - 1]; + return address >= start && address <= end; + } + + void WaitVoLabel(auto&& pred) { + std::unique_lock lk{vo_mutex}; + vo_cv.wait(lk, pred); + } + + void SignalVoLabel() { + std::scoped_lock lk{vo_mutex}; + vo_cv.notify_one(); + } + [[nodiscard]] int NumRegisteredBuffers() const { return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(), [](auto& buffer) { return buffer.group_index != -1; }); @@ -63,11 +85,8 @@ public: const BufferAttribute* attribute); int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); - void Flip(std::chrono::microseconds timeout); bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); - void Vblank(); - private: struct Request { Vulkan::Frame* frame; @@ -76,14 +95,19 @@ private: s64 flip_arg; u64 submit_tsc; bool eop; + + operator bool() const noexcept { + return frame != nullptr; + } }; + std::chrono::microseconds Flip(const Request& req); + void PresentThread(std::stop_token token); + std::mutex mutex; VideoOutPort main_port{}; - std::condition_variable_any submit_cond; - std::condition_variable done_cond; + std::jthread present_thread; std::queue requests; - bool is_neo{}; }; } // namespace Libraries::VideoOut diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 8fbd69c4..15e14662 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -183,6 +183,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus* return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->vo_mutex}; *status = port->vblank_status; return ORBIS_OK; } @@ -229,14 +230,6 @@ s32 PS4_SYSV_ABI sceVideoOutUnregisterBuffers(s32 handle, s32 attributeIndex) { return driver->UnregisterBuffers(port, attributeIndex); } -void Flip(std::chrono::microseconds micros) { - return driver->Flip(micros); -} - -void Vblank() { - return driver->Vblank(); -} - void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { auto* port = driver->GetPort(handle); ASSERT(port); @@ -266,6 +259,18 @@ s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo( return ORBIS_OK; } +s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle) { + auto* port = driver->GetPort(handle); + if (!port) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; + } + + std::unique_lock lock{port->vo_mutex}; + const auto prev_counter = port->vblank_status.count; + port->vblank_cv.wait(lock, [&]() { return prev_counter != port->vblank_status.count; }); + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); @@ -294,6 +299,7 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { sceVideoOutGetVblankStatus); LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetDeviceCapabilityInfo); + LIB_FUNCTION("j6RaAUlaLv0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutWaitVblank); // openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1 LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 52426ecc..b4423efd 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -92,11 +92,12 @@ void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, Pixe u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); -s32 PS4_SYSV_ABI sceVideoOutAddVBlankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); +s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata); s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses, s32 bufferNum, const BufferAttribute* attribute); s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate); s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle); +s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle); s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode, s64 flipArg); s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status); s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status); @@ -104,9 +105,6 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i const void* param); s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); -void Flip(std::chrono::microseconds micros); -void Vblank(); - // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk); diff --git a/src/emulator.cpp b/src/emulator.cpp index 0b542e68..5b162e05 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include #include @@ -10,21 +9,30 @@ #include #include #include -#include #include "common/config.h" #include "common/debug.h" #include "common/logging/backend.h" +#include "common/logging/log.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" #include "common/singleton.h" #include "common/version.h" +#include "core/file_format/psf.h" +#include "core/file_format/splash.h" #include "core/file_sys/fs.h" +#include "core/libraries/disc_map/disc_map.h" #include "core/libraries/kernel/thread_management.h" +#include "core/libraries/libc/libc.h" +#include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libs.h" +#include "core/libraries/rtc/rtc.h" #include "core/linker.h" #include "core/memory.h" #include "emulator.h" +#include "video_core/renderdoc.h" + +#include Frontend::WindowSDL* g_window = nullptr; @@ -52,6 +60,9 @@ Emulator::Emulator() { memory = Core::Memory::Instance(); controller = Common::Singleton::Instance(); linker = Common::Singleton::Instance(); + + // Load renderdoc module. + VideoCore::LoadRenderDoc(); } Emulator::~Emulator() { @@ -120,6 +131,12 @@ void Emulator::Run(const std::filesystem::path& file) { } mnt->Mount(mount_download_dir, "/download0"); + const auto& mount_captures_dir = Common::FS::GetUserPath(Common::FS::PathType::CapturesDir); + if (!std::filesystem::exists(mount_captures_dir)) { + std::filesystem::create_directory(mount_captures_dir); + } + VideoCore::SetOutputDir(mount_captures_dir.generic_string(), id); + // Initialize kernel and library facilities. Libraries::Kernel::init_pthreads(); Libraries::InitHLELibs(&linker->GetHLESymbols()); @@ -152,14 +169,8 @@ void Emulator::Run(const std::filesystem::path& file) { std::jthread mainthread = std::jthread([this](std::stop_token stop_token) { linker->Execute(); }); - // Begin main window loop until the application exits - static constexpr std::chrono::milliseconds FlipPeriod{16}; - while (window->isOpen()) { window->waitEvent(); - Libraries::VideoOut::Flip(FlipPeriod); - Libraries::VideoOut::Vblank(); - FRAME_END; } std::exit(0); diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 4570b64e..2da24610 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -11,6 +11,7 @@ #include "core/libraries/pad/pad.h" #include "input/controller.h" #include "sdl_window.h" +#include "video_core/renderdoc.h" #ifdef __APPLE__ #include @@ -72,7 +73,7 @@ void WindowSDL::waitEvent() { // Called on main thread SDL_Event event; - if (!SDL_PollEvent(&event)) { + if (!SDL_WaitEvent(&event)) { return; } @@ -180,6 +181,11 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { ax = Input::GetAxis(-0x80, 0x80, axisvalue); break; case SDLK_S: + if (event->key.mod == SDL_KMOD_LCTRL) { + // Trigger rdoc capture + VideoCore::TriggerCapture(); + break; + } axis = Input::Axis::LeftY; if (event->type == SDL_EVENT_KEY_DOWN) { axisvalue += 127; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index ab7ad241..df7eec82 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -5,8 +5,10 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" +#include "video_core/renderdoc.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" namespace AmdGpu { @@ -32,12 +34,15 @@ void Liverpool::Process(std::stop_token stoken) { while (!stoken.stop_requested()) { { std::unique_lock lk{submit_mutex}; - Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; }); + Common::CondvarWait(submit_cv, lk, stoken, + [this] { return num_submits != 0 || submit_done; }); } if (stoken.stop_requested()) { break; } + VideoCore::StartCapture(); + int qid = -1; while (num_submits) { @@ -48,11 +53,9 @@ void Liverpool::Process(std::stop_token stoken) { Task::Handle task{}; { std::scoped_lock lock{queue.m_access}; - if (queue.submits.empty()) { continue; } - task = queue.submits.front(); } task.resume(); @@ -64,9 +67,20 @@ void Liverpool::Process(std::stop_token stoken) { queue.submits.pop(); --num_submits; + std::scoped_lock lock2{submit_mutex}; + submit_cv.notify_all(); } } + if (submit_done) { + VideoCore::EndCapture(); + + if (rasterizer) { + rasterizer->Flush(); + } + submit_done = false; + } + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle); } } @@ -365,8 +379,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); const u32 data_size = (header->type3.count.Value() - 2) * 4; + u64* address = write_data->Address(); if (!write_data->wr_one_addr.Value()) { - std::memcpy(write_data->Address(), write_data->data, data_size); + std::memcpy(address, write_data->data, data_size); } else { UNREACHABLE(); } @@ -379,6 +394,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // Optimization: VO label waits are special because the emulator + // will write to the label when presentation is finished. So if + // there are no other submits to yield to we can sleep the thread + // instead and allow other tasks to run. + const u64* wait_addr = wait_reg_mem->Address(); + if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + } while (!wait_reg_mem->Test()) { TracyFiberLeave; co_yield {}; @@ -511,7 +534,7 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto task = ProcessGraphics(dcb, ccb); { - std::unique_lock lock{queue.m_access}; + std::scoped_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } @@ -526,7 +549,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { const auto& task = ProcessCompute(acb, vqid); { - std::unique_lock lock{queue.m_access}; + std::scoped_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b87c80ed..8553bc92 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,10 @@ namespace Vulkan { class Rasterizer; } +namespace Libraries::VideoOut { +struct VideoOutPort; +} + namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) @@ -991,10 +996,25 @@ public: void SubmitGfx(std::span dcb, std::span ccb); void SubmitAsc(u32 vqid, std::span acb); + void SubmitDone() noexcept { + std::scoped_lock lk{submit_mutex}; + submit_done = true; + submit_cv.notify_one(); + } + + void WaitGpuIdle() noexcept { + std::unique_lock lk{submit_mutex}; + submit_cv.wait(lk, [this] { return num_submits == 0; }); + } + bool IsGpuIdle() const { return num_submits == 0; } + void SetVoPort(Libraries::VideoOut::VideoOutPort* port) { + vo_port = port; + } + void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } @@ -1059,8 +1079,10 @@ private: } cblock{}; Vulkan::Rasterizer* rasterizer{}; + Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; }; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index eded2de3..9b44da02 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -404,8 +404,9 @@ struct PM4CmdWaitRegMem { u32 mask; u32 poll_interval; - u32* Address() const { - return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); + template + T Address() const { + return reinterpret_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); } bool Test() const { @@ -464,8 +465,8 @@ struct PM4CmdWriteData { } template - T* Address() const { - return reinterpret_cast(addr64); + T Address() const { + return reinterpret_cast(addr64); } }; @@ -494,8 +495,9 @@ struct PM4CmdEventWriteEos { BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS }; - u32* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + template + T Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); } u32 DataDWord() const { diff --git a/src/video_core/renderdoc.cpp b/src/video_core/renderdoc.cpp new file mode 100644 index 00000000..7f88e126 --- /dev/null +++ b/src/video_core/renderdoc.cpp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/config.h" +#include "video_core/renderdoc.h" + +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include + +namespace VideoCore { + +enum class CaptureState { + Idle, + Triggered, + InProgress, +}; +static CaptureState capture_state{CaptureState::Idle}; + +RENDERDOC_API_1_6_0* rdoc_api{}; + +void LoadRenderDoc() { +#ifdef WIN32 + + // Check if we are running by RDoc GUI + HMODULE mod = GetModuleHandleA("renderdoc.dll"); + if (!mod && Config::isRdocEnabled()) { + // If enabled in config, try to load RDoc runtime in offline mode + HKEY h_reg_key; + LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, + L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0, + KEY_READ, &h_reg_key); + if (result != ERROR_SUCCESS) { + return; + } + std::array key_str{}; + DWORD str_sz_out{key_str.size()}; + result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)key_str.data(), &str_sz_out); + if (result != ERROR_SUCCESS) { + return; + } + + std::filesystem::path path{key_str.cbegin(), key_str.cend()}; + path = path.parent_path().append("renderdoc.dll"); + const auto path_to_lib = path.generic_string(); + mod = LoadLibraryA(path_to_lib.c_str()); + } + + if (mod) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } +#else +#ifdef ANDROID + static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so"; +#else + static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; +#endif + if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } +#endif + if (rdoc_api) { + // Disable default capture keys as they suppose to trigger present-to-present capturing + // and it is not what we want + rdoc_api->SetCaptureKeys(nullptr, 0); + + // Also remove rdoc crash handler + rdoc_api->UnloadCrashHandler(); + } +} + +void StartCapture() { + if (!rdoc_api) { + return; + } + + if (capture_state == CaptureState::Triggered) { + rdoc_api->StartFrameCapture(nullptr, nullptr); + capture_state = CaptureState::InProgress; + } +} + +void EndCapture() { + if (!rdoc_api) { + return; + } + + if (capture_state == CaptureState::InProgress) { + rdoc_api->EndFrameCapture(nullptr, nullptr); + capture_state = CaptureState::Idle; + } +} + +void TriggerCapture() { + if (capture_state == CaptureState::Idle) { + capture_state = CaptureState::Triggered; + } +} + +void SetOutputDir(const std::string& path, const std::string& prefix) { + if (!rdoc_api) { + return; + } + rdoc_api->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str()); +} + +} // namespace VideoCore diff --git a/src/video_core/renderdoc.h b/src/video_core/renderdoc.h new file mode 100644 index 00000000..febf6fbc --- /dev/null +++ b/src/video_core/renderdoc.h @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace VideoCore { + +/// Loads renderdoc dynamic library module. +void LoadRenderDoc(); + +/// Begins a capture if a renderdoc instance is attached. +void StartCapture(); + +/// Ends current renderdoc capture. +void EndCapture(); + +/// Triggers capturing process. +void TriggerCapture(); + +/// Sets output directory for captures +void SetOutputDir(const std::string& path, const std::string& prefix); + +} // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 098f14d9..6810bf34 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -63,44 +63,30 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for }; } -RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool) - : window{window_}, instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, - scheduler{instance}, swapchain{instance, window}, texture_cache{instance, scheduler} { - rasterizer = std::make_unique(instance, scheduler, texture_cache, liverpool); +RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_) + : window{window_}, liverpool{liverpool_}, + instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance}, + present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window}, + texture_cache{instance, draw_scheduler} { + rasterizer = std::make_unique(instance, draw_scheduler, texture_cache, liverpool); const u32 num_images = swapchain.GetImageCount(); const vk::Device device = instance.GetDevice(); - const vk::CommandPoolCreateInfo pool_info = { - .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | - vk::CommandPoolCreateFlagBits::eTransient, - .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), - }; - command_pool = device.createCommandPoolUnique(pool_info); - - const vk::CommandBufferAllocateInfo alloc_info = { - .commandPool = *command_pool, - .level = vk::CommandBufferLevel::ePrimary, - .commandBufferCount = num_images, - }; - - const auto cmdbuffers = device.allocateCommandBuffers(alloc_info); + // Create presentation frames. present_frames.resize(num_images); for (u32 i = 0; i < num_images; i++) { Frame& frame = present_frames[i]; - frame.cmdbuf = cmdbuffers[i]; - frame.render_ready = device.createSemaphore({}); frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); free_queue.push(&frame); } } RendererVulkan::~RendererVulkan() { - scheduler.Finish(); + draw_scheduler.Finish(); const vk::Device device = instance.GetDevice(); for (auto& frame : present_frames) { vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); device.destroyImageView(frame.image_view); - device.destroySemaphore(frame.render_ready); device.destroyFence(frame.present_done); } } @@ -184,7 +170,7 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { info.pitch = splash->GetImageInfo().width; info.guest_address = VAddr(splash->GetImageData().data()); info.guest_size_bytes = splash->GetImageData().size(); - splash_img.emplace(instance, scheduler, info); + splash_img.emplace(instance, present_scheduler, info); texture_cache.RefreshImage(*splash_img); } frame = PrepareFrameInternal(*splash_img); @@ -193,12 +179,18 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { return true; } -Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { +Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); - // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + // EOP flips are triggered from GPU thread so use the drawing scheduler to record + // commands. Otherwise we are dealing with a CPU flip which could have arrived + // from any guest thread. Use a separate scheduler for that. + auto& scheduler = is_eop ? draw_scheduler : flip_scheduler; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -218,12 +210,11 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { }, }, }; - - const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), @@ -245,13 +236,15 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); - // Flush pending vulkan operations. - scheduler.Flush(frame->render_ready); + // Flush frame creation commands. + frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle(); + frame->ready_tick = scheduler.CurrentTick(); + SubmitInfo info{}; + scheduler.Flush(info); return frame; } @@ -260,11 +253,8 @@ void RendererVulkan::Present(Frame* frame) { const vk::Image swapchain_image = swapchain.Image(); - const vk::CommandBufferBeginInfo begin_info = { - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, - }; - const vk::CommandBuffer cmdbuf = frame->cmdbuf; - cmdbuf.begin(begin_info); + auto& scheduler = present_scheduler; + const auto cmdbuf = scheduler.CommandBuffer(); { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -339,35 +329,17 @@ void RendererVulkan::Present(Frame* frame) { TracyVkCollect(profiler_ctx, cmdbuf); } } - cmdbuf.end(); - static constexpr std::array wait_stage_masks = { - vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eAllGraphics, - }; - - const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); - const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); - const std::array wait_semaphores = {image_acquired, frame->render_ready}; - - vk::SubmitInfo submit_info = { - .waitSemaphoreCount = static_cast(wait_semaphores.size()), - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1u, - .pCommandBuffers = &cmdbuf, - .signalSemaphoreCount = 1, - .pSignalSemaphores = &present_ready, - }; - - std::scoped_lock submit_lock{scheduler.submit_mutex}; - try { - instance.GetGraphicsQueue().submit(submit_info, frame->present_done); - } catch (vk::DeviceLostError& err) { - LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what()); - UNREACHABLE(); - } + // Flush vulkan commands. + SubmitInfo info{}; + info.AddWait(swapchain.GetImageAcquiredSemaphore()); + info.AddWait(frame->ready_semaphore, frame->ready_tick); + info.AddSignal(swapchain.GetPresentReadySemaphore()); + info.AddSignal(frame->present_done); + scheduler.Flush(info); + // Present to swapchain. + std::scoped_lock submit_lock{Scheduler::submit_mutex}; swapchain.Present(); // Free the frame for reuse diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 701d3d14..3fe9267f 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -26,9 +26,15 @@ struct Frame { VmaAllocation allocation; vk::Image image; vk::ImageView image_view; - vk::Semaphore render_ready; vk::Fence present_done; - vk::CommandBuffer cmdbuf; + vk::Semaphore ready_semaphore; + u64 ready_tick; +}; + +enum SchedulerType { + Draw, + Present, + CpuFlip, }; class Rasterizer; @@ -39,16 +45,16 @@ public: ~RendererVulkan(); Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, - VAddr cpu_address) { + VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info, cpu_address); auto& image = texture_cache.GetImage(image_id); - return PrepareFrameInternal(image); + return PrepareFrameInternal(image, is_eop); } Frame* PrepareBlankFrame() { auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image); + return PrepareFrameInternal(image, true); } VideoCore::Image& RegisterVideoOutSurface( @@ -60,9 +66,9 @@ public: } bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) { + return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) { return vo_buffer == color_buffer.Address(); - }) != vo_buffers_addr.cend(); + }) != vo_buffers_addr.end(); } bool ShowSplash(Frame* frame = nullptr); @@ -70,13 +76,16 @@ public: void RecreateFrame(Frame* frame, u32 width, u32 height); private: - Frame* PrepareFrameInternal(VideoCore::Image& image); + Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* GetRenderFrame(); private: Frontend::WindowSDL& window; + AmdGpu::Liverpool* liverpool; Instance instance; - Scheduler scheduler; + Scheduler draw_scheduler; + Scheduler present_scheduler; + Scheduler flip_scheduler; Swapchain swapchain; std::unique_ptr rasterizer; VideoCore::TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 037510d4..753f2bbd 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include "common/assert.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" @@ -60,46 +58,4 @@ void MasterSemaphore::Wait(u64 tick) { Refresh(); } -void MasterSemaphore::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, - u64 signal_value) { - cmdbuf.end(); - - const u32 num_signal_semaphores = signal ? 2U : 1U; - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{Handle(), signal}; - - const u32 num_wait_semaphores = wait ? 2U : 1U; - const std::array wait_values{signal_value - 1, u64(1)}; - const std::array wait_semaphores{Handle(), wait}; - - static constexpr std::array wait_stage_masks = { - vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eColorAttachmentOutput, - }; - - const vk::TimelineSemaphoreSubmitInfo timeline_si = { - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - - const vk::SubmitInfo submit_info = { - .pNext = &timeline_si, - .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1u, - .pCommandBuffers = &cmdbuf, - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - - try { - instance.GetGraphicsQueue().submit(submit_info); - } catch (vk::DeviceLostError& err) { - UNREACHABLE_MSG("Device lost during submit: {}", err.what()); - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 963676b1..ebc7a60a 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -46,10 +46,6 @@ public: /// Waits for a tick to be hit on the GPU void Wait(u64 tick); - /// Submits the provided command buffer for execution - void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, - u64 signal_value); - protected: const Instance& instance; vk::UniqueSemaphore semaphore; ///< Timeline semaphore. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fe52d074..67a88c47 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,6 +96,13 @@ void Rasterizer::DispatchDirect() { cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); } +u64 Rasterizer::Flush() { + const u64 current_tick = scheduler.CurrentTick(); + SubmitInfo info{}; + scheduler.Flush(info); + return current_tick; +} + void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index aead5955..64dc87ef 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -36,6 +36,8 @@ public: void ScopeMarkerBegin(const std::string& str); void ScopeMarkerEnd(); + u64 Flush(); + private: u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset); void MapMemory(VAddr addr, size_t size); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 39dc2847..e7b12d49 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,12 +2,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "common/debug.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { +std::mutex Scheduler::submit_mutex; + Scheduler::Scheduler(const Instance& instance) : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} { profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); @@ -50,22 +53,24 @@ void Scheduler::EndRendering() { current_cmdbuf.endRendering(); } -void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { - // When flushing, we only send data to the worker thread; no waiting is necessary. - SubmitExecution(signal, wait); +void Scheduler::Flush(SubmitInfo& info) { + // When flushing, we only send data to the driver; no waiting is necessary. + SubmitExecution(info); } -void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { +void Scheduler::Finish() { // When finishing, we need to wait for the submission to have executed on the device. const u64 presubmit_tick = CurrentTick(); - SubmitExecution(signal, wait); + SubmitInfo info{}; + SubmitExecution(info); Wait(presubmit_tick); } void Scheduler::Wait(u64 tick) { if (tick >= master_semaphore.CurrentTick()) { // Make sure we are not waiting for the current tick without signalling - Flush(); + SubmitInfo info{}; + Flush(info); } master_semaphore.Wait(tick); } @@ -86,7 +91,7 @@ void Scheduler::AllocateWorkerCommandBuffers() { } } -void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { +void Scheduler::SubmitExecution(SubmitInfo& info) { std::scoped_lock lk{submit_mutex}; const u64 signal_value = master_semaphore.NextTick(); @@ -97,7 +102,40 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa } EndRendering(); - master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); + current_cmdbuf.end(); + + const vk::Semaphore timeline = master_semaphore.Handle(); + info.AddSignal(timeline, signal_value); + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::TimelineSemaphoreSubmitInfo timeline_si = { + .waitSemaphoreValueCount = static_cast(info.wait_ticks.size()), + .pWaitSemaphoreValues = info.wait_ticks.data(), + .signalSemaphoreValueCount = static_cast(info.signal_ticks.size()), + .pSignalSemaphoreValues = info.signal_ticks.data(), + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = static_cast(info.wait_semas.size()), + .pWaitSemaphores = info.wait_semas.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1U, + .pCommandBuffers = ¤t_cmdbuf, + .signalSemaphoreCount = static_cast(info.signal_semas.size()), + .pSignalSemaphores = info.signal_semas.data(), + }; + + try { + instance.GetGraphicsQueue().submit(submit_info, info.fence); + } catch (vk::DeviceLostError& err) { + UNREACHABLE_MSG("Device lost during submit: {}", err.what()); + } + master_semaphore.Refresh(); AllocateWorkerCommandBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b4504274..1e640b08 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -26,16 +26,39 @@ struct RenderState { } }; +struct SubmitInfo { + boost::container::static_vector wait_semas; + boost::container::static_vector wait_ticks; + boost::container::static_vector signal_semas; + boost::container::static_vector signal_ticks; + vk::Fence fence; + + void AddWait(vk::Semaphore semaphore, u64 tick = 1) { + wait_semas.emplace_back(semaphore); + wait_ticks.emplace_back(tick); + } + + void AddSignal(vk::Semaphore semaphore, u64 tick = 1) { + signal_semas.emplace_back(semaphore); + signal_ticks.emplace_back(tick); + } + + void AddSignal(vk::Fence fence) { + this->fence = fence; + } +}; + class Scheduler { public: explicit Scheduler(const Instance& instance); ~Scheduler(); - /// Sends the current execution context to the GPU. - void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + /// Sends the current execution context to the GPU + /// and increments the scheduler timeline semaphore. + void Flush(SubmitInfo& info); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + void Finish(); /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); @@ -76,12 +99,12 @@ public: pending_ops.emplace(func, CurrentTick()); } - std::mutex submit_mutex; + static std::mutex submit_mutex; private: void AllocateWorkerCommandBuffers(); - void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); + void SubmitExecution(SubmitInfo& info); private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 7fffdeb2..20c99e30 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -55,7 +55,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, .compositeAlpha = composite_alpha, - .presentMode = vk::PresentModeKHR::eFifo, + .presentMode = vk::PresentModeKHR::eMailbox, .clipped = true, .oldSwapchain = nullptr, }; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index e097ba3e..ace2e4d5 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -231,7 +231,7 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler}, - staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} { + staging{instance, scheduler, StagingFlags, 256_MB, Vulkan::BufferType::Upload} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,