diff --git a/src/common/config.cpp b/src/common/config.cpp index 2da0844e..b7d99bbf 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -20,6 +20,7 @@ static bool isLibc = true; static bool isShowSplash = false; static bool isNullGpu = false; static bool shouldDumpShaders = false; +static bool shouldDumpPM4 = false; bool isLleLibc() { return isLibc; @@ -64,6 +65,10 @@ bool dumpShaders() { return shouldDumpShaders; } +bool dumpPM4() { + return shouldDumpPM4; +} + void load(const std::filesystem::path& path) { // If the configuration file does not exist, create it and return std::error_code error; @@ -102,6 +107,7 @@ void load(const std::filesystem::path& path) { gpuId = toml::find_or(gpu, "gpuId", 0); isNullGpu = toml::find_or(gpu, "nullGpu", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); + shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); } } if (data.contains("Debug")) { @@ -149,6 +155,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["dumpShaders"] = shouldDumpShaders; + data["GPU"]["dumpPM4"] = shouldDumpPM4; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; diff --git a/src/common/config.h b/src/common/config.h index 8a8db451..53925379 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -23,5 +23,6 @@ bool isLleLibc(); bool showSplash(); bool nullGpu(); bool dumpShaders(); +bool dumpPM4(); }; // namespace Config diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index 35ac4448..7210f212 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -35,6 +35,7 @@ static auto UserPaths = [] { create_path(PathType::LogDir, user_dir / LOG_DIR); create_path(PathType::ScreenshotsDir, user_dir / SCREENSHOTS_DIR); create_path(PathType::ShaderDir, user_dir / SHADER_DIR); + create_path(PathType::PM4Dir, user_dir / PM4_DIR); create_path(PathType::SaveDataDir, user_dir / SAVEDATA_DIR); create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR); diff --git a/src/common/path_util.h b/src/common/path_util.h index ea457040..d7664871 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -13,6 +13,7 @@ enum class PathType { LogDir, // Where log files are stored. ScreenshotsDir, // Where screenshots are stored. ShaderDir, // Where shaders are stored. + PM4Dir, // Where command lists are stored. SaveDataDir, // Where guest save data is stored. SysModuleDir, // Where system modules are stored. }; @@ -23,6 +24,7 @@ constexpr auto PORTABLE_DIR = "user"; constexpr auto LOG_DIR = "log"; constexpr auto SCREENSHOTS_DIR = "screenshots"; constexpr auto SHADER_DIR = "shader"; +constexpr auto PM4_DIR = "pm4"; constexpr auto SAVEDATA_DIR = "savedata"; constexpr auto SYSMODULES_DIR = "sys_modules"; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index a2358955..4e43544a 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "common/logging/log.h" +#include "common/path_util.h" #include "core/libraries/error_codes.h" #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" @@ -28,6 +30,21 @@ static constexpr bool g_fair_hw_init = false; // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; +static u64 frames_submitted{}; // frame counter + +static void DumpCommandList(std::span cmd_list, const std::string& postfix) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::PM4Dir); + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + if (cmd_list.empty()) { + return; + } + const auto filename = std::format("{:08}_{}", frames_submitted, postfix); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(cmd_list); +} // Write a special ending NOP packet with N DWs data block template @@ -1439,7 +1456,25 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ const auto dcb_size_dw = dcb_sizes_in_bytes[cbpair] >> 2; const auto ccb_size_dw = ccb_size_in_bytes >> 2; - liverpool->SubmitGfx({dcb_gpu_addrs[cbpair], dcb_size_dw}, {ccb, ccb_size_dw}); + const auto& dcb_span = std::span{dcb_gpu_addrs[cbpair], dcb_size_dw}; + const auto& ccb_span = std::span{ccb, ccb_size_dw}; + + if (Config::dumpPM4()) { + static auto last_frame_num = frames_submitted; + static u32 seq_num{}; + if (last_frame_num == frames_submitted) { + ++seq_num; + } else { + last_frame_num = frames_submitted; + seq_num = 0u; + } + + // File name format is: __ + DumpCommandList(dcb_span, std::format("dcb_{}_{}", seq_num, cbpair)); + DumpCommandList(ccb_span, std::format("ccb_{}_{}", seq_num, cbpair)); + } + + liverpool->SubmitGfx(dcb_span, ccb_span); } return ORBIS_OK; @@ -1455,6 +1490,8 @@ int PS4_SYSV_ABI sceGnmSubmitDone() { if (!liverpool->IsGpuIdle()) { submission_lock = true; } + liverpool->NotifySubmitDone(); + ++frames_submitted; return ORBIS_OK; } diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 1216d61c..32f179a5 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -5,6 +5,7 @@ #include #include "common/assert.h" #include "common/logging/log.h" +#include "common/thread.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" @@ -827,6 +828,7 @@ static void cleanup_thread(void* arg) { static void* run_thread(void* arg) { auto* thread = static_cast(arg); + Common::SetCurrentThreadName(thread->name.c_str()); void* ret = nullptr; g_pthread_self = thread; pthread_cleanup_push(cleanup_thread, thread); diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index de7c1236..4675fd60 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "core/libraries/app_content/app_content.h" #include "core/libraries/audio/audioin.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/gnmdriver/gnmdriver.h" @@ -27,7 +28,6 @@ #include "core/libraries/system/systemservice.h" #include "core/libraries/system/userservice.h" #include "core/libraries/videoout/video_out.h" -#include "core/libraries/app_content/app_content.h" namespace Libraries { diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 316bcdd2..81248072 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -8,6 +8,7 @@ #include "common/logging/log.h" #include "common/path_util.h" #include "common/string_util.h" +#include "common/thread.h" #include "core/aerolib/aerolib.h" #include "core/aerolib/stubs.h" #include "core/libraries/kernel/thread_management.h" @@ -676,6 +677,8 @@ void Linker::Execute() { DebugDump(); } + Common::SetCurrentThreadName("GAME_MainThread"); + Libraries::Kernel::pthreadInitSelfMainThread(); // Relocate all modules for (const auto& m : m_modules) { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 0bcc3486..478bc726 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -61,7 +61,11 @@ void Liverpool::Process(std::stop_token stoken) { --num_submits; } } - num_submits.notify_all(); + + if (submit_done) { + num_submits.notify_all(); + submit_done = false; + } } } @@ -163,8 +167,61 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; + const auto* payload = reinterpret_cast(header + 2); + + std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); + + // In the case of HW, render target memory has alignment as color block operates on + // tiles. There is no information of actual resource extents stored in CB context + // regs, so any deduction of it from slices/pitch will lead to a larger surface created. + // The same applies to the depth targets. Fortunatelly, the guest always sends + // a trailing NOP packet right after the context regs setup, so we can use the heuristic + // below and extract the hint to determine actual resource dims. + + switch (reg_addr) { + case ContextRegs::CbColor0Base: + [[fallthrough]]; + case ContextRegs::CbColor1Base: + [[fallthrough]]; + case ContextRegs::CbColor2Base: + [[fallthrough]]; + case ContextRegs::CbColor3Base: + [[fallthrough]]; + case ContextRegs::CbColor4Base: + [[fallthrough]]; + case ContextRegs::CbColor5Base: + [[fallthrough]]; + case ContextRegs::CbColor6Base: + [[fallthrough]]; + case ContextRegs::CbColor7Base: { + const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) / + (ContextRegs::CbColor1Base - ContextRegs::CbColor0Base); + ASSERT(col_buf_id < NumColorBuffers); + + const auto nop_offset = header->type3.count; + if (nop_offset == 0x0e) { + ASSERT_MSG(payload[nop_offset] == 0xc0001000, + "NOP hint is missing in CB setup sequence"); + last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; + } else { + last_cb_extent[col_buf_id].raw = 0; + } + break; + } + case ContextRegs::DbZInfo: { + if (header->type3.count == 8) { + ASSERT_MSG(payload[20] == 0xc0001000, + "NOP hint is missing in DB setup sequence"); + last_db_extent.raw = payload[21]; + } else { + last_db_extent.raw = 0; + } + break; + } + default: + break; + } break; } case PM4ItOpcode::SetShReg: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9c2b4bcd..c52a0f97 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -682,6 +682,18 @@ struct Liverpool { Polygon = 21, }; + enum ContextRegs : u32 { + DbZInfo = 0xA010, + CbColor0Base = 0xA318, + CbColor1Base = 0xA327, + CbColor2Base = 0xA336, + CbColor3Base = 0xA345, + CbColor4Base = 0xA354, + CbColor5Base = 0xA363, + CbColor6Base = 0xA372, + CbColor7Base = 0xA381, + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -765,6 +777,21 @@ struct Liverpool { Regs regs{}; + // See for a comment in context reg parsing code + union CbDbExtent { + struct { + u16 width; + u16 height; + }; + u32 raw{0u}; + + [[nodiscard]] bool Valid() const { + return raw != 0; + } + }; + std::array last_cb_extent{}; + CbDbExtent last_db_extent{}; + public: Liverpool(); ~Liverpool(); @@ -777,6 +804,11 @@ public: return num_submits == 0; } + void NotifySubmitDone() { + submit_done = true; + num_submits.notify_all(); + } + void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } @@ -841,6 +873,7 @@ private: Vulkan::Rasterizer* rasterizer{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic submit_done{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d0b873fa..37d6f72b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -41,11 +41,14 @@ void Rasterizer::Draw(bool is_indexed) { boost::container::static_vector color_attachments{}; - for (const auto& col_buf : regs.color_buffers) { + for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { + const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { continue; } - const auto& image_view = texture_cache.RenderTarget(col_buf); + + const auto& hint = liverpool->last_cb_extent[col_buf_id]; + const auto& image_view = texture_cache.RenderTarget(col_buf, hint); color_attachments.push_back({ .imageView = *image_view.image_view, diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index efe3cf3d..cc29f010 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -82,12 +82,13 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } -ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { is_tiled = true; pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); type = vk::ImageType::e2D; - size.width = buffer.Pitch(); - size.height = buffer.Height(); + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; pitch = size.width; guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 2128d098..cc3adff4 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -34,7 +34,8 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; - explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index e42a0bbd..be4bf907 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -101,6 +101,7 @@ TextureCache::~TextureCache() { } void TextureCache::OnCpuWrite(VAddr address) { + std::unique_lock lock{m_page_table}; ForEachImageInRegion(address, 1 << PageShift, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; @@ -110,6 +111,7 @@ void TextureCache::OnCpuWrite(VAddr address) { } Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { + std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { if (image.cpu_addr == cpu_address) { @@ -151,8 +153,9 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { return slot_image_views[view_id]; } -ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { - const ImageInfo info{buffer}; +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) { + const ImageInfo info{buffer, hint}; auto& image = FindImage(info, buffer.Address()); ImageViewInfo view_info; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index bb37677d..94c49929 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -42,7 +42,8 @@ public: ImageView& FindImageView(const AmdGpu::Image& image); /// Retrieves the render target with specified properties - ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. void RefreshImage(Image& image); @@ -136,6 +137,7 @@ private: #ifdef _WIN64 void* veh_handle{}; #endif + std::mutex m_page_table; }; } // namespace VideoCore