From 2cbbcbd3716a4f749659ffb3d7429f6b90723c03 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 29 Jun 2024 15:49:59 +0200 Subject: [PATCH] Metadata support (#223) * texture_cache: more image usage flags * texture_cache: metadata registration * renderer_vulkan: initial CMask support * renderer_vulkan: skip redundant FCE and FMask decompression passes * renderer_vulkan: redundant VO surface registration removed * renderer_vulkan: initial HTile support * renderer_vulkan: added support for MSAA attachments * renderer_vulkan: skip unnecessary metadata updates --- .../ir/passes/resource_tracking_pass.cpp | 16 ++-- src/video_core/amdgpu/liverpool.h | 55 ++++++++++++-- .../renderer_vulkan/liverpool_to_vk.cpp | 73 +++++++++++++++++++ .../renderer_vulkan/liverpool_to_vk.h | 4 + .../renderer_vulkan/renderer_vulkan.h | 3 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 38 +++++++--- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 13 +++- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_instance.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 21 +++++- .../renderer_vulkan/vk_rasterizer.cpp | 31 ++++++-- src/video_core/texture_cache/image.cpp | 9 ++- src/video_core/texture_cache/image.h | 22 ++++-- .../texture_cache/texture_cache.cpp | 57 ++++++++++++++- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++ 16 files changed, 336 insertions(+), 47 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 8e1c186c..ac419197 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -294,15 +294,19 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const IR::Inst* body = inst.Arg(1).InstRecursive(); const auto [coords, arg] = [&] -> std::pair { switch (image.GetType()) { - case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1D: // x return {body->Arg(0), body->Arg(1)}; - case AmdGpu::ImageType::Color1DArray: - case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color1DArray: // x, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2D: // x, y return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; - case AmdGpu::ImageType::Color2DArray: - case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Color2DArray: // x, y, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2DMsaa: // x, y, frag + [[fallthrough]]; + case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; - case AmdGpu::ImageType::Cube: + case AmdGpu::ImageType::Cube: // x, y, face return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c5966305..bcc1187a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -324,6 +324,10 @@ struct Liverpool { BitField<0, 2, ZFormat> format; BitField<2, 2, u32> num_samples; BitField<13, 3, u32> tile_split; + BitField<27, 1, u32> allow_expclear; + BitField<28, 1, u32> read_size; + BitField<29, 1, u32> tile_surface_en; + BitField<31, 1, u32> zrange_precision; } z_info; union { BitField<0, 1, StencilFormat> format; @@ -352,7 +356,7 @@ struct Liverpool { return u64(z_read_base) << 8; } - [[nodiscard]] size_t GetSizeAligned() const { + size_t GetSizeAligned() const { return depth_slice.tile_max * 8; } }; @@ -606,6 +610,20 @@ struct Liverpool { BitField<30, 1, u32> enable; }; + union ColorControl { + enum class OperationMode : u32 { + Disable = 0u, + Normal = 1u, + EliminateFastClear = 2u, + Resolve = 3u, + FmaskDecompress = 5u, + }; + + BitField<3, 1, u32> degamma_enable; + BitField<4, 3, OperationMode> mode; + BitField<16, 8, u32> rop3; + }; + struct ColorBuffer { enum class EndianSwap : u32 { None = 0, @@ -688,11 +706,15 @@ struct Liverpool { return u64(base_address) << 8; } - u64 CmaskAddress() const { - return u64(cmask_base_address) << 8; + VAddr CmaskAddress() const { + return VAddr(cmask_base_address) << 8; } - [[nodiscard]] size_t GetSizeAligned() const { + VAddr FmaskAddress() const { + return VAddr(fmask_base_address) << 8; + } + + size_t GetSizeAligned() const { const auto num_bytes_per_element = NumBits(info.format) / 8u; const auto slice_size = (slice.tile_max + 1) * 64u; const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element; @@ -700,11 +722,11 @@ struct Liverpool { return total_size; } - [[nodiscard]] TilingMode GetTilingMode() const { + TilingMode GetTilingMode() const { return attrib.tile_mode_index; } - [[nodiscard]] bool IsTiled() const { + bool IsTiled() const { return !info.linear_general; } @@ -769,6 +791,18 @@ struct Liverpool { BitField<1, 1, u32> stencil_clear_enable; }; + union AaConfig { + BitField<0, 3, u32> msaa_num_samples; + BitField<4, 1, u32> aa_mask_centroid_dtmn; + BitField<13, 4, u32> max_sample_dst; + BitField<20, 3, u32> msaa_exposed_samples; + BitField<24, 2, u32> detail_to_exposed_mode; + + u32 NumSamples() const { + return 1 << msaa_num_samples; + } + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -821,7 +855,8 @@ struct Liverpool { u32 draw_initiator; INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4); DepthControl depth_control; - INSERT_PADDING_WORDS(2); + INSERT_PADDING_WORDS(1); + ColorControl color_control; DepthBufferControl depth_buffer_control; ClipperControl clipper_control; PolygonControl polygon_control; @@ -835,7 +870,9 @@ struct Liverpool { u32 enable_primitive_id; INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1); PolygonOffset poly_offset; - INSERT_PADDING_WORDS(0xA318 - 0xA2DF - 5); + INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); + AaConfig aa_config; + INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); ColorBuffer color_buffers[NumColorBuffers]; INSERT_PADDING_WORDS(0xC242 - 0xA390); PrimitiveType primitive_type; @@ -991,6 +1028,7 @@ static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200); +static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); @@ -998,6 +1036,7 @@ static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); +static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8e3f9207..6594aab9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" namespace Vulkan::LiverpoolToVK { @@ -381,6 +382,13 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, case vk::Format::eB8G8R8A8Srgb: return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb; } + } else { + if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) { + return vk::Format::eR8G8B8A8Unorm; + } + if (is_vo_surface && base_format == vk::Format::eB8G8R8A8Srgb) { + return vk::Format::eB8G8R8A8Unorm; + } } return base_format; } @@ -422,4 +430,69 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { } } +static constexpr float U8ToUnorm(u8 v) { + static constexpr auto c = 1.0f / 255.0f; + return float(v * c); +} + +vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { + const auto comp_swap = color_buffer.info.comp_swap.Value(); + ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard || + comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate, + "Unsupported component swap mode {}", static_cast(comp_swap)); + + const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; + + const auto& c0 = color_buffer.clear_word0; + const auto& c1 = color_buffer.clear_word1; + const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); + + vk::ClearColorValue color{}; + switch (color_buffer.info.number_type) { + case AmdGpu::NumberFormat::Snorm: + [[fallthrough]]; + case AmdGpu::NumberFormat::SnormNz: + [[fallthrough]]; + case AmdGpu::NumberFormat::Unorm: + [[fallthrough]]; + case AmdGpu::NumberFormat::Srgb: { + switch (num_bits) { + case 32: { + color.float32 = std::array{ + U8ToUnorm((c0 >> (comp_swap_alt ? 16 : 0)) & 0xff), + U8ToUnorm((c0 >> 8) & 0xff), + U8ToUnorm((c0 >> (comp_swap_alt ? 0 : 16)) & 0xff), + U8ToUnorm((c0 >> 24) & 0xff), + }; + break; + } + default: { + LOG_ERROR(Render_Vulkan, "Missing clear color conversion for bits {}", num_bits); + break; + } + } + break; + } + default: { + LOG_ERROR(Render_Vulkan, "Missing clear color conversion for type {}", + color_buffer.info.number_type.Value()); + break; + } + } + return {.color = color}; +} + +vk::SampleCountFlagBits NumSamples(u32 num_samples) { + switch (num_samples) { + case 1: + return vk::SampleCountFlagBits::e1; + case 2: + return vk::SampleCountFlagBits::e2; + case 4: + return vk::SampleCountFlagBits::e4; + default: + UNREACHABLE(); + } +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 59df89b4..aae39630 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -46,6 +46,10 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, Liverpool::DepthBuffer::StencilFormat stencil_format); +vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer); + +vk::SampleCountFlagBits NumSamples(u32 num_samples); + void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a0fb6d4a..523ff05b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -40,7 +40,8 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { - auto& image = RegisterVideoOutSurface(attribute, cpu_address); + const auto info = VideoCore::ImageInfo{attribute}; + auto& image = texture_cache.FindImage(info, cpu_address); return PrepareFrameInternal(image); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0d59d13f..0de0f425 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -82,7 +82,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipeline::~ComputePipeline() = default; -void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, +bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. boost::container::static_vector buffer_infos; @@ -93,12 +93,11 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s for (const auto& buffer : info.buffers) { const auto vsharp = info.ReadUd(buffer.sgpr_base, buffer.dword_offset); const u32 size = vsharp.GetSize(); - const VAddr addr = vsharp.base_address.Value(); - texture_cache.OnCpuWrite(addr); - const u32 offset = staging.Copy(addr, size, + const VAddr address = vsharp.base_address.Value(); + texture_cache.OnCpuWrite(address); + const u32 offset = staging.Copy(address, size, buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment()); - // const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr); buffer_infos.emplace_back(staging.Handle(), offset, size); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, @@ -109,6 +108,21 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + + // Most of the time when a metadata is updated with a shader it gets cleared. It means we + // can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will + // need its full emulation anyways. For cases of metadata read a warning will be logged. + if (buffer.is_storage) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } + } } for (const auto& image : info.images) { @@ -124,6 +138,10 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s : vk::DescriptorType::eSampledImage, .pImageInfo = &image_infos.back(), }); + + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)"); + } } for (const auto& sampler : info.samplers) { const auto ssharp = info.ReadUd(sampler.sgpr_base, sampler.dword_offset); @@ -139,11 +157,13 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s }); } - if (!set_writes.empty()) { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, - set_writes); + if (set_writes.empty()) { + return false; } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes); + return true; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 781bd81b..1d074814 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -31,7 +31,7 @@ public: return *pipeline; } - void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, + bool BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const; private: diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b3401ec1..0fd7e5e5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -92,7 +92,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; const vk::PipelineMultisampleStateCreateInfo multisampling = { - .rasterizationSamples = vk::SampleCountFlagBits::e1, + .rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples), .sampleShadingEnable = false, }; @@ -327,8 +327,9 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& for (const auto& stage : stages) { for (const auto& buffer : stage.buffers) { const auto vsharp = stage.ReadUd(buffer.sgpr_base, buffer.dword_offset); + const VAddr address = vsharp.base_address.Value(); const u32 size = vsharp.GetSize(); - const u32 offset = staging.Copy(vsharp.base_address.Value(), size, + const u32 offset = staging.Copy(address, size, buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment()); buffer_infos.emplace_back(staging.Handle(), offset, size); @@ -341,6 +342,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); + } } for (const auto& image : stage.images) { @@ -357,6 +362,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& : vk::DescriptorType::eSampledImage, .pImageInfo = &image_infos.back(), }); + + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); + } } for (const auto& sampler : stage.samplers) { const auto ssharp = diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index cccd35e3..060a2695 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -38,6 +38,7 @@ struct GraphicsPipelineKey { float depth_bias_slope_factor; float depth_bias_clamp; u32 depth_bias_enable; + u32 num_samples = 1; Liverpool::StencilControl stencil; Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_back; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index afaf3005..ecc27314 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -205,6 +205,7 @@ bool Instance::CreateDevice() { .logicOp = features.logicOp, .samplerAnisotropy = features.samplerAnisotropy, .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, + .shaderStorageImageMultisample = true, .shaderClipDistance = features.shaderClipDistance, }, }, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cb72edaf..a6d4b770 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,12 +114,18 @@ void PipelineCache::RefreshGraphicsKey() { key.cull_mode = regs.polygon_control.CullingMode(); key.clip_space = regs.clipper_control.clip_space; key.front_face = regs.polygon_control.front_face; + key.num_samples = regs.aa_config.NumSamples(); const auto& db = regs.depth_buffer; if (key.depth.depth_enable) { key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); } + + // TODO: Should be a check for `OperationMode::Disable` once we emulate HW state init packet + // sent by system software. + const auto skip_cb_binding = false; + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. // We need to do some arrays compaction at this stage @@ -129,7 +135,7 @@ void PipelineCache::RefreshGraphicsKey() { int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (!col_buf) { + if (!col_buf || skip_cb_binding) { continue; } const auto base_format = @@ -160,6 +166,19 @@ void PipelineCache::RefreshGraphicsKey() { std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const auto& regs = liverpool->regs; + // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an + // actual draw hence can skip pipeline creation. + if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { + LOG_TRACE(Render_Vulkan, "FCE pass skipped"); + return {}; + } + + if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) { + // TODO: check for a valid MRT1 to promote the draw to the resolve pass. + LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); + return {}; + } + u32 binding{}; std::array programs; std::array infos{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 85a051f8..d378bd15 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -40,12 +40,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto& regs = liverpool->regs; const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset); const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); + if (!pipeline) { + return; + } + pipeline->BindResources(memory, vertex_index_buffer, texture_cache); boost::container::static_vector color_attachments{}; - vk::RenderingAttachmentInfo depth_attachment{}; - u32 num_depth_attachments{}; for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { @@ -55,17 +57,26 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto& hint = liverpool->last_cb_extent[col_buf_id]; const auto& image_view = texture_cache.RenderTarget(col_buf, hint); + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); color_attachments.push_back({ .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eGeneral, - .loadOp = vk::AttachmentLoadOp::eLoad, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = + is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, }); + texture_cache.TouchMeta(col_buf.CmaskAddress(), false); } + + vk::RenderingAttachmentInfo depth_attachment{}; + u32 num_depth_attachments{}; if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) { - const bool is_clear = regs.depth_render_control.depth_clear_enable; + const auto htile_address = regs.depth_htile_data_base.GetAddress(); + const bool is_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address); const auto& image_view = - texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); + texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent); depth_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eGeneral, @@ -74,6 +85,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, .stencil = regs.stencil_clear}}, }; + texture_cache.TouchMeta(htile_address, false); num_depth_attachments++; } @@ -112,7 +124,14 @@ void Rasterizer::DispatchDirect() { const auto cmdbuf = scheduler.CommandBuffer(); const auto& cs_program = liverpool->regs.cs_program; const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); - pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + if (!pipeline) { + return; + } + + const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + if (!has_resources) { + return; + } cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e223bbaf..750cc437 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -91,7 +91,7 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } } - if (info.is_tiled || info.is_storage) { + if (info.is_tiled || info.usage.storage) { usage |= vk::ImageUsageFlagBits::eStorage; } return usage; @@ -149,10 +149,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, size.depth = 1; pitch = size.width; guest_size_bytes = buffer.GetSizeAligned(); + meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; + meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; usage.render_target = true; } -ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, +ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); @@ -163,6 +165,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, size.depth = 1; pitch = size.width; guest_size_bytes = buffer.GetSizeAligned(); + meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; } @@ -178,6 +181,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { resources.levels = image.NumLevels(); resources.layers = image.NumLayers(); guest_size_bytes = image.GetSizeAligned(); + usage.texture = true; } UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) @@ -248,6 +252,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, }, .mipLevels = static_cast(info.resources.levels), .arrayLayers = static_cast(info.resources.layers), + .samples = LiverpoolToVK::NumSamples(info.num_samples), .tiling = vk::ImageTiling::eOptimal, .usage = usage, .initialLayout = vk::ImageLayout::eUndefined, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 9d9022c6..2067cde3 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -25,11 +25,12 @@ VK_DEFINE_HANDLE(VmaAllocator) namespace VideoCore { enum ImageFlagBits : u32 { - CpuModified = 1 << 2, ///< Contents have been modified from the CPU - GpuModified = 1 << 3, ///< Contents have been modified from the GPU - Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU - Registered = 1 << 6, ///< True when the image is registered - Picked = 1 << 7, ///< Temporary flag to mark the image as picked + CpuModified = 1 << 2, ///< Contents have been modified from the CPU + GpuModified = 1 << 3, ///< Contents have been modified from the GPU + Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU + Registered = 1 << 6, ///< True when the image is registered + Picked = 1 << 7, ///< Temporary flag to mark the image as picked + MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -38,7 +39,7 @@ struct ImageInfo { explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, + explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept; @@ -49,16 +50,21 @@ struct ImageInfo { bool IsPacked() const; bool IsDepthStencil() const; + struct { + VAddr cmask_addr; + VAddr fmask_addr; + VAddr htile_addr; + } meta_info{}; + struct { u32 texture : 1; u32 storage : 1; u32 render_target : 1; u32 depth_target : 1; u32 vo_buffer : 1; - } usage; // Usage data tracked during image lifetime + } usage{}; // Usage data tracked during image lifetime bool is_tiled = false; - bool is_storage = false; vk::Format pixel_format = vk::Format::eUndefined; vk::ImageType type = vk::ImageType::e1D; SubresourceExtent resources; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 44112423..b5371f6b 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -131,6 +131,8 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re image_id = image_ids[0]; } + RegisterMeta(info, image_id); + Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::CpuModified) && (!image_ids.empty() || refresh_on_create)) { @@ -150,7 +152,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi // impossible to use. However, during view creation, if an image isn't used as storage we can // temporary remove its storage bit. std::optional usage_override; - if (!image.info.is_storage) { + if (!image.info.usage.storage) { usage_override = image.usage & ~vk::ImageUsageFlagBits::eStorage; } @@ -161,12 +163,15 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi } ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) { - Image& image = FindImage(ImageInfo{desc}, desc.Address()); + const ImageInfo info{desc}; + Image& image = FindImage(info, desc.Address()); if (is_storage) { image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); + image.info.usage.storage = true; } else { image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead); + image.info.usage.texture = true; } const ImageViewInfo view_info{desc, is_storage}; @@ -183,13 +188,16 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eColorAttachmentRead); + image.info.usage.render_target = true; + ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer}; return RegisterImageView(image, view_info); } ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) { - const ImageInfo info{buffer, hint}; + const ImageInfo info{buffer, htile_address, hint}; auto& image = FindImage(info, buffer.Address(), false); image.flags &= ~ImageFlagBits::CpuModified; @@ -197,6 +205,8 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); + image.info.usage.depth_target = true; + ImageViewInfo view_info; view_info.format = info.pixel_format; return RegisterImageView(image, view_info); @@ -276,6 +286,47 @@ void TextureCache::RegisterImage(ImageId image_id) { [this, image_id](u64 page) { page_table[page].push_back(image_id); }); } +void TextureCache::RegisterMeta(const ImageInfo& info, ImageId image_id) { + Image& image = slot_images[image_id]; + + if (image.flags & ImageFlagBits::MetaRegistered) { + return; + } + + bool registered = true; + // Current resource tracking implementation allows us to detect usage of meta only in the last + // moment, so we likely will miss its first clear. To avoid this and make first frame, where + // the meta is encountered, looks correct we set its state to "cleared" at registrations time. + if (info.usage.render_target) { + if (info.meta_info.cmask_addr) { + surface_metas.emplace( + info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); + image.info.meta_info.cmask_addr = info.meta_info.cmask_addr; + } + + if (info.meta_info.fmask_addr) { + surface_metas.emplace( + info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); + image.info.meta_info.fmask_addr = info.meta_info.fmask_addr; + } + } else if (info.usage.depth_target) { + if (info.meta_info.htile_addr) { + surface_metas.emplace( + info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); + image.info.meta_info.htile_addr = info.meta_info.htile_addr; + } + } else { + registered = false; + } + + if (registered) { + image.flags |= ImageFlagBits::MetaRegistered; + } +} + void TextureCache::UnregisterImage(ImageId image_id) { Image& image = slot_images[image_id]; ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0ecd9faf..8778f3e8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -29,6 +29,17 @@ class TextureCache { static constexpr u64 PageBits = 20; static constexpr u64 PageMask = (1ULL << PageBits) - 1; + struct MetaDataInfo { + enum class Type { + CMask, + FMask, + HTile, + }; + + Type type; + bool is_cleared; + }; + public: explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TextureCache(); @@ -47,6 +58,7 @@ public: [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint); [[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. @@ -60,6 +72,27 @@ public: return slot_images[id]; } + bool IsMeta(VAddr address) const { + return surface_metas.contains(address); + } + + bool IsMetaCleared(VAddr address) const { + const auto& it = surface_metas.find(address); + if (it != surface_metas.end()) { + return it.value().is_cleared; + } + return false; + } + + bool TouchMeta(VAddr address, bool is_clear) { + auto it = surface_metas.find(address); + if (it != surface_metas.end()) { + it.value().is_cleared = is_clear; + return true; + } + return false; + } + private: ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info); @@ -123,6 +156,9 @@ private: /// Register image in the page table void RegisterImage(ImageId image); + /// Register metadata surfaces attached to the image + void RegisterMeta(const ImageInfo& info, ImageId image); + /// Unregister image from the page table void UnregisterImage(ImageId image); @@ -145,6 +181,7 @@ private: tsl::robin_map samplers; tsl::robin_pg_map> page_table; boost::icl::interval_map cached_pages; + tsl::robin_map surface_metas; std::mutex mutex; #ifdef _WIN64 void* veh_handle{};