diff --git a/src/common/debug.h b/src/common/debug.h index ea1dff7d..f9974228 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -51,4 +51,7 @@ enum MarkersPallete : int { #define GPU_SCOPE_LOCATION(name, color) \ tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color}; +#define MUTEX_LOCATION(name) \ + tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0}; + #define FRAME_END FrameMark diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index fb829ce5..05ae882f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -622,7 +622,6 @@ int PS4_SYSV_ABI sceGnmGetShaderStatus() { VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() { LOG_TRACE(Lib_GnmDriver, "called"); - // Actual virtual buffer address is hardcoded in the driver to 0xff00'000 return tessellation_factors_ring_addr; } @@ -964,15 +963,16 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 0x7e020b01u, // v_cvt_f32_i32 v1, v1 - 0x7E000B00U, - 0x7e040280u, // v_cvt_f32_i32 v0, v0 + 0x7e000b00U, // v_cvt_f32_i32 v0, v0 + 0x7e040280u, // v_mov_b32 v2, 0 0x7e0602f2u, // v_mov_b32 v3, 1.0 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 0xbf810000u, // s_endpgm - // OrbShdr header + // Binary header 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + // VS regs 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, }; // clang-format on @@ -1512,9 +1512,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ const auto& ccb_span = std::span{ccb, ccb_size_dw}; if (Config::dumpPM4()) { - static auto last_frame_num = frames_submitted; + static auto last_frame_num = -1LL; static u32 seq_num{}; - if (last_frame_num && last_frame_num == frames_submitted) { + if (last_frame_num == frames_submitted) { ++seq_num; } else { last_frame_num = frames_submitted; diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 804e18c5..ae4f7582 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -429,7 +429,11 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr); + static auto mutex_loc = MUTEX_LOCATION("mutex"); + (*mutex)->tracy_lock = std::make_unique(&mutex_loc); + if (name != nullptr) { + (*mutex)->tracy_lock->CustomName(name, std::strlen(name)); LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); } @@ -526,7 +530,11 @@ int PS4_SYSV_ABI scePthreadMutexattrSetprotocol(ScePthreadMutexattr* attr, int p UNREACHABLE_MSG("Invalid protocol: {}", protocol); } +#if _WIN64 + int result = 0; +#else int result = pthread_mutexattr_setprotocol(&(*attr)->pth_mutex_attr, pprotocol); +#endif (*attr)->pprotocol = pprotocol; return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } @@ -537,10 +545,15 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) { return SCE_KERNEL_ERROR_EINVAL; } + (*mutex)->tracy_lock->BeforeLock(); + int result = pthread_mutex_lock(&(*mutex)->pth_mutex); if (result != 0) { LOG_TRACE(Kernel_Pthread, "Locked name={}, result={}", (*mutex)->name, result); } + + (*mutex)->tracy_lock->AfterLock(); + switch (result) { case 0: return SCE_OK; @@ -565,6 +578,9 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) { if (result != 0) { LOG_TRACE(Kernel_Pthread, "Unlocking name={}, result={}", (*mutex)->name, result); } + + (*mutex)->tracy_lock->AfterUnlock(); + switch (result) { case 0: return SCE_OK; @@ -1095,6 +1111,9 @@ int PS4_SYSV_ABI scePthreadMutexTrylock(ScePthreadMutex* mutex) { if (result != 0) { LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); } + + (*mutex)->tracy_lock->AfterTryLock(result == 0); + switch (result) { case 0: return ORBIS_OK; diff --git a/src/core/libraries/kernel/thread_management.h b/src/core/libraries/kernel/thread_management.h index 1909ffd3..f7124cff 100644 --- a/src/core/libraries/kernel/thread_management.h +++ b/src/core/libraries/kernel/thread_management.h @@ -9,6 +9,7 @@ #include #include #include +#include "common/debug.h" #include "common/types.h" namespace Core::Loader { @@ -72,6 +73,7 @@ struct PthreadMutexInternal { u8 reserved[256]; std::string name; pthread_mutex_t pth_mutex; + std::unique_ptr tracy_lock; }; struct PthreadMutexattrInternal { diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index e86fb1ed..c7677252 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -134,6 +134,7 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co .address_right = 0, }; + renderer->RegisterVideoOutSurface(group, address); LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index dd717990..b4a71997 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -49,7 +49,9 @@ struct Liverpool { using UserData = std::array; struct BinaryInfo { - u8 signature[7]; + static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr + + std::array signature; u8 version; u32 pssl_or_cg : 1; u32 cached : 1; @@ -65,6 +67,11 @@ struct Liverpool { u8 reserved3; u64 shader_hash; u32 crc32; + + bool Valid() const { + return shader_hash && crc32 && + (std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0); + } }; struct ShaderProgram { @@ -134,6 +141,14 @@ struct Liverpool { } }; + template + static constexpr auto* GetBinaryInfo(const Shader& sh) { + const auto* code = sh.template Address(); + const auto* bininfo = std::bit_cast(code + (code[1] + 1) * 2); + ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header"); + return bininfo; + } + union PsInputControl { u32 raw; BitField<0, 5, u32> input_offset; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 0096d34b..feb39a30 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -277,6 +277,7 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) { } vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eR32G32B32A32Sfloat; @@ -291,7 +292,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { - return vk::Format::eB8G8R8A8Srgb; + return vk::Format::eR8G8B8A8Srgb; } if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Float) { @@ -353,6 +354,31 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } +vk::Format AdjustColorBufferFormat(vk::Format base_format, + Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) { + ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard || + comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate, + "Unsupported component swap mode {}", static_cast(comp_swap)); + + const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; + + switch (base_format) { + case vk::Format::eR8G8B8A8Unorm: + return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format; + case vk::Format::eB8G8R8A8Unorm: + return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format; + case vk::Format::eR8G8B8A8Srgb: + return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm + : is_vo_surface ? vk::Format::eR8G8B8A8Unorm + : base_format; + case vk::Format::eB8G8R8A8Srgb: + return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm + : is_vo_surface ? vk::Format::eB8G8R8A8Unorm + : base_format; + } + UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format)); +} + vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { if (z_format == DepthBuffer::ZFormat::Z32Float && stencil_format == DepthBuffer::StencilFormat::Stencil8) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 27569c4f..59df89b4 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -40,6 +40,9 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); +vk::Format AdjustColorBufferFormat(vk::Format base_format, + Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface); + vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, Liverpool::DepthBuffer::StencilFormat stencil_format); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index dc986249..fb6bc248 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -192,19 +192,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { return true; } -Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, - VAddr cpu_address) { - // Request presentation image from the texture cache. - const auto info = VideoCore::ImageInfo{attribute}; - auto& image = texture_cache.FindImage(info, cpu_address); - return PrepareFrameInternal(image); -} - -Frame* RendererVulkan::PrepareBlankFrame() { - auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image); -} - Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f4b1a608..a0fb6d4a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -4,6 +4,7 @@ #pragma once #include +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -38,8 +39,28 @@ public: ~RendererVulkan(); Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, - VAddr cpu_address); - Frame* PrepareBlankFrame(); + VAddr cpu_address) { + auto& image = RegisterVideoOutSurface(attribute, cpu_address); + return PrepareFrameInternal(image); + } + + Frame* PrepareBlankFrame() { + auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); + return PrepareFrameInternal(image); + } + + VideoCore::Image& RegisterVideoOutSurface( + const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { + vo_buffers_addr.emplace_back(cpu_address); + const auto info = VideoCore::ImageInfo{attribute}; + return texture_cache.FindImage(info, cpu_address); + } + + bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { + return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) { + return vo_buffer == color_buffer.Address(); + }) != vo_buffers_addr.cend(); + } bool ShowSplash(Frame* frame = nullptr); void Present(Frame* frame); @@ -63,6 +84,7 @@ private: std::condition_variable free_cv; std::condition_variable_any frame_cv; std::optional splash_img; + std::vector vo_buffers_addr; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7d1a980c..0d59d13f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,8 +95,9 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s const u32 size = vsharp.GetSize(); const VAddr addr = vsharp.base_address.Value(); texture_cache.OnCpuWrite(addr); - const u32 offset = - staging.Copy(addr, size, buffer.is_storage ? 4 : instance.UniformMinAlignment()); + const u32 offset = staging.Copy(addr, size, + buffer.is_storage ? instance.StorageMinAlignment() + : instance.UniformMinAlignment()); // const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr); buffer_infos.emplace_back(staging.Handle(), offset, size); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4a811eba..787fb774 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -327,7 +327,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& const auto vsharp = stage.ReadUd(buffer.sgpr_base, buffer.dword_offset); const u32 size = vsharp.GetSize(); const u32 offset = staging.Copy(vsharp.base_address.Value(), size, - buffer.is_storage ? 4 : instance.UniformMinAlignment()); + buffer.is_storage ? instance.StorageMinAlignment() + : instance.UniformMinAlignment()); buffer_infos.emplace_back(staging.Handle(), offset, size); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, @@ -399,7 +400,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { }; // Calculate buffers memory overlaps - std::vector ranges{}; + boost::container::static_vector ranges{}; for (const auto& input : vs_info.vs_inputs) { const auto& buffer = guest_buffers.emplace_back( vs_info.ReadUd(input.sgpr_base, input.dword_offset)); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4b38aa3d..cccd35e3 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -71,7 +71,7 @@ public: } [[nodiscard]] bool IsEmbeddedVs() const noexcept { - static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd; + static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[0] == EmbeddedVsHash; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 06a47675..afaf3005 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -213,6 +213,7 @@ bool Instance::CreateDevice() { }, vk::PhysicalDeviceVulkan12Features{ .scalarBlockLayout = true, + .uniformBufferStandardLayout = true, .hostQueryReset = true, .timelineSemaphore = true, }, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index f8e3c2e9..ad6196ab 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -169,6 +169,11 @@ public: return properties.limits.minUniformBufferOffsetAlignment; } + /// Returns the minimum required alignment for storage buffers + vk::DeviceSize StorageMinAlignment() const { + return properties.limits.minStorageBufferOffsetAlignment; + } + /// Returns the minimum alignemt required for accessing host-mapped device memory vk::DeviceSize NonCoherentAtomSize() const { return properties.limits.nonCoherentAtomSize; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 139f7715..7d5a839b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/config.h" #include "common/io_file.h" #include "common/path_util.h" @@ -9,11 +8,14 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +extern std::unique_ptr renderer; + namespace Vulkan { Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, @@ -74,8 +76,8 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const ComputePipeline* PipelineCache::GetComputePipeline() { const auto& cs_pgm = liverpool->regs.cs_program; ASSERT(cs_pgm.Address() != nullptr); - const auto code = cs_pgm.Code(); - compute_key = XXH3_64bits(code.data(), code.size_bytes()); + const auto* bininfo = Liverpool::GetBinaryInfo(cs_pgm); + compute_key = bininfo->shader_hash; const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); if (is_new) { it.value() = CreateComputePipeline(); @@ -130,8 +132,11 @@ void PipelineCache::RefreshGraphicsKey() { if (!col_buf) { continue; } - key.color_formats[remapped_cb] = + const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); + key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( + base_format, col_buf.info.comp_swap.Value(), is_vo_surface); key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); @@ -147,8 +152,8 @@ void PipelineCache::RefreshGraphicsKey() { key.stage_hashes[i] = 0; continue; } - const auto code = pgm->Code(); - key.stage_hashes[i] = XXH3_64bits(code.data(), code.size_bytes()); + const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); + key.stage_hashes[i] = bininfo->shader_hash; } } @@ -243,7 +248,7 @@ void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stag if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } - const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext); + const auto filename = fmt::format("{}_{:#018x}.{}", stage, hash, ext); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; file.WriteSpan(code); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index f81514d6..7fffdeb2 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -124,9 +124,9 @@ void Swapchain::FindPresentFormat() { const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface); // If there is a single undefined surface format, the device doesn't care, so we'll just use - // RGBA. + // RGBA sRGB. if (formats[0].format == vk::Format::eUndefined) { - surface_format.format = vk::Format::eR8G8B8A8Unorm; + surface_format.format = vk::Format::eR8G8B8A8Srgb; surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; return; } @@ -134,7 +134,7 @@ void Swapchain::FindPresentFormat() { // Try to find a suitable format. for (const vk::SurfaceFormatKHR& sformat : formats) { vk::Format format = sformat.format; - if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) { + if (format != vk::Format::eR8G8B8A8Srgb && format != vk::Format::eB8G8R8A8Srgb) { continue; } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b464f3d7..62ffdd1c 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -22,7 +22,7 @@ static vk::Format ConvertPixelFormat(const VideoOutFormat format) { case VideoOutFormat::A8R8G8B8Srgb: return vk::Format::eB8G8R8A8Srgb; case VideoOutFormat::A8B8G8R8Srgb: - return vk::Format::eA8B8G8R8SrgbPack32; + return vk::Format::eR8G8B8A8Srgb; case VideoOutFormat::A2R10G10B10: case VideoOutFormat::A2R10G10B10Srgb: return vk::Format::eA2R10G10B10UnormPack32; @@ -57,6 +57,17 @@ bool ImageInfo::IsBlockCoded() const { } } +bool ImageInfo::IsPacked() const { + switch (pixel_format) { + case vk::Format::eB5G5R5A1UnormPack16: + [[fallthrough]]; + case vk::Format::eB5G6R5UnormPack16: + return true; + default: + return false; + } +} + bool ImageInfo::IsDepthStencil() const { switch (pixel_format) { case vk::Format::eD16Unorm: @@ -76,7 +87,7 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { if (info.IsDepthStencil()) { usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { - if (!info.IsBlockCoded()) { + if (!info.IsBlockCoded() && !info.IsPacked()) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } } @@ -110,8 +121,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe size.width = attrib.width; size.height = attrib.height; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7; - const bool is_32bpp = pixel_format == vk::Format::eB8G8R8A8Srgb || - pixel_format == vk::Format::eA8B8G8R8SrgbPack32; + const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float; ASSERT(is_32bpp); if (!is_tiled) { guest_size_bytes = pitch * size.height * 4; @@ -122,6 +132,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } else { guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4; } + is_vo_surface = true; } ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 1b577046..f54a796f 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -43,10 +43,12 @@ struct ImageInfo { explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool IsBlockCoded() const; + bool IsPacked() const; bool IsDepthStencil() const; bool is_tiled = false; bool is_storage = false; + bool is_vo_surface = false; vk::Format pixel_format = vk::Format::eUndefined; vk::ImageType type = vk::ImageType::e1D; vk::ImageUsageFlags usage; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 59fb47b5..72566a88 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -62,6 +62,14 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce } } +ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, + bool is_vo_surface) noexcept { + const auto base_format = + Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); + format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( + base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); +} + ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, std::optional usage_override /*= {}*/) : info{info_} { diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 83936acc..6b567ed1 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,6 +3,7 @@ #pragma once +#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/types.h" @@ -19,6 +20,8 @@ namespace VideoCore { struct ImageViewInfo { explicit ImageViewInfo() = default; explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, + bool is_vo_surface) noexcept; vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8c910c03..526dfa47 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -183,8 +183,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eColorAttachmentRead); - ImageViewInfo view_info; - view_info.format = info.pixel_format; + ImageViewInfo view_info{buffer, image.info.is_vo_surface}; return RegisterImageView(image, view_info); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 36e1d1e1..3d1cc5c5 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -178,8 +178,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { case vk::Format::eR8Unorm: return vk::Format::eR8Uint; + case vk::Format::eR8G8B8A8Srgb: + [[fallthrough]]; case vk::Format::eB8G8R8A8Srgb: [[fallthrough]]; + case vk::Format::eB8G8R8A8Unorm: + [[fallthrough]]; case vk::Format::eR8G8B8A8Unorm: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaUnormBlock: @@ -315,7 +319,8 @@ bool TileManager::TryDetile(Image& image) { return false; } - const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4); + const auto offset = + staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment()); image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); auto cmdbuf = scheduler.CommandBuffer();