From 08e155946e8ce6a5c9caafe29f00b8d4baf69ec8 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 22 May 2024 23:05:19 +0300 Subject: [PATCH] video_core: Remove hack in rasterizer * The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers --- .../frontend/fetch_shader.cpp | 81 +++++++++++++++++++ src/shader_recompiler/frontend/fetch_shader.h | 22 +++++ .../frontend/structured_control_flow.cpp | 1 - src/shader_recompiler/ir/attribute.cpp | 4 + src/shader_recompiler/ir/passes/passes.h | 3 +- .../ir/passes/resource_tracking_pass.cpp | 5 +- src/shader_recompiler/ir/program.h | 3 + src/shader_recompiler/recompiler.cpp | 13 ++- src/shader_recompiler/recompiler.h | 1 + src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 38 +-------- src/video_core/amdgpu/pixel_format.cpp | 2 +- src/video_core/amdgpu/pixel_format.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 21 ++++- .../renderer_vulkan/liverpool_to_vk.h | 3 + .../renderer_vulkan/renderer_vulkan.cpp | 2 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 11 +-- .../renderer_vulkan/vk_rasterizer.h | 3 + src/video_core/texture_cache/image.cpp | 15 ++++ src/video_core/texture_cache/image.h | 2 + .../texture_cache/texture_cache.cpp | 33 +++----- src/video_core/texture_cache/texture_cache.h | 2 +- 24 files changed, 193 insertions(+), 80 deletions(-) create mode 100644 src/shader_recompiler/frontend/fetch_shader.cpp create mode 100644 src/shader_recompiler/frontend/fetch_shader.h diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp new file mode 100644 index 00000000..1ae8c894 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/fetch_shader.h" + +namespace Shader::Gcn { + +/** + * s_load_dwordx4 s[8:11], s[2:3], 0x00 + * s_load_dwordx4 s[12:15], s[2:3], 0x04 + * s_load_dwordx4 s[16:19], s[2:3], 0x08 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen + * buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen + * buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen + * s_waitcnt 0 + * s_setpc_b64 s[0:1] + + * s_load_dwordx4 s[4:7], s[2:3], 0x0 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen + * s_load_dwordx4 s[4:7], s[2:3], 0x8 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen + * s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) + * s_setpc_b64 s[0:1] + + * A normal fetch shader looks like the above, the instructions are generated + * using input semantics on cpu side. Load instructions can either be separate or interleaved + * We take the reverse way, extract the original input semantics from these instructions. + **/ + +std::vector ParseFetchShader(std::span code) { + std::vector attributes; + GcnCodeSlice code_slice(code.data(), code.data() + std::numeric_limits::max()); + GcnDecodeContext decoder; + + struct VsharpLoad { + u32 dword_offset{}; + s32 base_sgpr{}; + s32 dst_sgpr{-1}; + }; + boost::container::static_vector loads; + + u32 semantic_index = 0; + while (!code_slice.atEnd()) { + const auto inst = decoder.decodeInstruction(code_slice); + if (inst.opcode == Opcode::S_SETPC_B64) { + break; + } + + if (inst.inst_class == InstClass::ScalarMemRd) { + loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code); + continue; + } + + if (inst.inst_class == InstClass::VectorMemBufFmt) { + // Find the load instruction that loaded the V# to the SPGR. + // This is so we can determine its index in the vertex table. + const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) { + return load.dst_sgpr == inst.src[2].code * 4; + }); + + auto& attrib = attributes.emplace_back(); + attrib.semantic = semantic_index++; + attrib.dest_vgpr = inst.src[1].code; + attrib.num_elements = inst.control.mubuf.count; + attrib.sgpr_base = it->base_sgpr; + attrib.dword_offset = it->dword_offset; + + // Mark load as used. + it->dst_sgpr = -1; + } + } + + return attributes; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h new file mode 100644 index 00000000..636cd5e7 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/types.h" + +namespace Shader::Gcn { + +struct VertexAttribute { + u8 semantic; ///< Semantic index of the attribute + u8 dest_vgpr; ///< Destination VGPR to load first component + u8 num_elements; ///< Number of components to load + u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# + u8 dword_offset; ///< The dword offset of the V# that describes this attribute. +}; + +std::vector ParseFetchShader(std::span code); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index f593529d..3464a88e 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -634,7 +634,6 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stage, inst_list.subspan(start, size)); - fmt::print("{}\n", IR::DumpBlock(*current_block)); break; } case StatementType::SetVariable: { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 714053bc..3b60bf65 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) { return "Param31"; case Attribute::VertexId: return "VertexId"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::FragCoord: + return "FragCoord"; default: break; } diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h index 49bb09b1..e4baae92 100644 --- a/src/shader_recompiler/ir/passes/passes.h +++ b/src/shader_recompiler/ir/passes/passes.h @@ -4,6 +4,7 @@ #pragma once #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" namespace Shader::Optimization { @@ -11,6 +12,6 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program); -void ResourceTrackingPass(IR::BlockList& program); +void ResourceTrackingPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index feb213df..39f0b808 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) { }; } -void ResourceTrackingPass(IR::BlockList& program) { - for (IR::Block* const block : program) { +void ResourceTrackingPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsResourceInstruction(inst)) { continue; } - printf("ff\n"); IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto loc = TrackSharp(producer->Arg(0)); fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f4f5197f..2efb6f50 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -15,11 +15,14 @@ enum class Stage : u32; namespace Shader::IR { +static constexpr size_t NumUserDataRegs = 16; + struct Program { AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; std::vector ins_list; + std::array user_data; Stage stage; }; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5bc521bd..3215ed6d 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -32,6 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { std::vector TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Stage stage, + std::span ud_regs, std::span token) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; @@ -40,6 +41,11 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); Gcn::GcnDecodeContext decoder; + static int counter = 0; + std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary); + file.write((const char*)token.data(), token.size_bytes()); + file.close(); + // Decode and save instructions IR::Program program; program.ins_list.reserve(token.size()); @@ -56,14 +62,19 @@ std::vector TranslateProgram(ObjectPool& inst_pool, program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); program.stage = stage; + std::ranges::copy(ud_regs, program.user_data.begin()); // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); - // Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); + Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::DeadCodeEliminationPass(program.blocks); + for (const auto& block : program.blocks) { + fmt::print("{}\n", IR::DumpBlock(*block)); + } + // TODO: Pass profile from vulkan backend const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); return code; diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8cd9c7ea..c746c3d8 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -28,6 +28,7 @@ struct BinaryInfo { [[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Stage stage, + std::span ud_regs, std::span code); } // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 09c1cb66..b1563a34 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -122,7 +122,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - // rasterizer->DrawIndex(); + rasterizer->DrawIndex(); break; } case PM4ItOpcode::DispatchDirect: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index f0a27bb1..c93d019b 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" +#include "video_core/amdgpu/pixel_format.h" #include #include @@ -423,39 +424,6 @@ struct Liverpool { Swap8In64 = 3, }; - enum class Format : u32 { - Invalid = 0, - Color_8 = 1, - Color_16 = 2, - Color_8_8 = 3, - Color_32 = 4, - Color_16_16 = 5, - Color_10_11_11 = 6, - Color_11_11_10 = 7, - Color_10_10_10_2 = 8, - Color_2_10_10_10 = 9, - Color_8_8_8_8 = 10, - Color_32_32 = 11, - Color_16_16_16_16 = 12, - Color_32_32_32_32 = 14, - Color_5_6_5 = 16, - Color_1_5_5_5 = 17, - Color_5_5_5_1 = 18, - Color_4_4_4_4 = 19, - Color_8_24 = 20, - Color_24_8 = 21, - Color_X24_8_32_FL = 22, - }; - - enum class NumberType : u32 { - Unorm = 0, - Snorm = 1, - Uint = 4, - Sint = 5, - Srgb = 6, - Float = 7, - }; - enum class SwapMode : u32 { Standard = 0, Alternate = 1, @@ -482,9 +450,9 @@ struct Liverpool { } view; union { BitField<0, 2, EndianSwap> endian; - BitField<2, 5, Format> format; + BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberType> number_type; + BitField<8, 2, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 775fb1f1..5bb8f0fb 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -6,7 +6,7 @@ namespace AmdGpu { -u32 getNumComponents(DataFormat format) { +u32 NumComponents(DataFormat format) { constexpr std::array numComponentsPerElement = { 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 488b00fc..f28e4235 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -59,6 +59,6 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -u32 getNumComponents(DataFormat format); +u32 NumComponents(DataFormat format); } // namespace AmdGpu diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8f9a76a2..00b28de9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" @@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::QuadList: + // Needs to generate index buffer on the fly. + return vk::PrimitiveTopology::eTriangleList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; @@ -110,4 +113,20 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32B32A32Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR32G32B32Uint; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR8G8B8A8Unorm; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { + return vk::Format::eR8G8B8A8Srgb; + } + UNREACHABLE(); +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 97994bf8..c04b1cb9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { @@ -20,4 +21,6 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e952263e..72ee6c9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { if (!frame) { if (!splash_img.has_value()) { - VideoCore::ImageInfo info{}; info.pixel_format = vk::Format::eR8G8B8A8Srgb; info.type = vk::ImageType::e2D; @@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr } Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { - // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6cbd26b9..4ba2b61b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -126,7 +126,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pName = "main", }; - const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; + const vk::Format color_format = vk::Format::eR8G8B8A8Srgb; const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { .colorAttachmentCount = 1, .pColorAttachmentFormats = &color_format, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 28fb51d0..23281c78 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -50,7 +50,7 @@ void PipelineCache::BindPipeline() { // Compile and cache shader. const auto data = std::span{token, bininfo.length / sizeof(u32)}; - const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data); + const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, pgm.user_data, data); return CompileSPV(program, instance.GetDevice()); }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5f5d3d4e..595dcff1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -31,20 +31,11 @@ void Rasterizer::DrawIndex() { const auto cmdbuf = scheduler.CommandBuffer(); auto& regs = liverpool->regs; - static bool first_time = true; - if (first_time) { - first_time = false; - return; - } - UpdateDynamicState(); pipeline_cache.BindPipeline(); - const u32 pitch = regs.color_buffers[0].Pitch(); - const u32 height = regs.color_buffers[0].Height(); - const u32 tile_max = regs.color_buffers[0].slice.tile_max; - auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch); + auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ba3c2d3a..a1e940ba 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -29,6 +29,9 @@ public: /// Performs a draw call with an index buffer. void DrawIndex(); + /// Performs a draw call without an index buffer. + void DrawAuto(); + /// Updates graphics state that is not part of the bound pipeline. void UpdateDynamicState(); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b78d2563..e9ac4ff0 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/config.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/image.h" @@ -65,6 +66,20 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { + // There is a small difference between T# and CB number types, account for it. + const auto number_fmt = + buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + : buffer.info.number_type; + is_tiled = true; + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt); + type = vk::ImageType::e2D; + size.width = buffer.Pitch(); + size.height = buffer.Height(); + pitch = size.width; + guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); +} + UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c1bddec7..92391fde 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -6,6 +6,7 @@ #include "common/enum.h" #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; bool is_tiled = false; vk::Format pixel_format = vk::Format::eUndefined; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 15679ba9..17cc3ec2 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -101,8 +101,8 @@ TextureCache::~TextureCache() { } void TextureCache::OnCpuWrite(VAddr address) { - const VAddr address_aligned = address & ~((1 << PageBits) - 1); - ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) { + const VAddr address_aligned = address & ~((1 << PageShift) - 1); + ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -137,26 +137,19 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } -ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { - boost::container::small_vector image_ids; - ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { - if (image.cpu_addr == cpu_address) { - image_ids.push_back(image_id); - } - }); +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { + const ImageInfo info{buffer}; + auto& image = FindImage(info, buffer.Address()); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); - auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; - - ImageViewInfo info; - info.format = vk::Format::eB8G8R8A8Srgb; - if (const ImageViewId view_id = image->FindView(info); view_id) { + ImageViewInfo view_info; + view_info.format = info.pixel_format; + if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } - const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); - image->image_view_infos.emplace_back(info); - image->image_view_ids.emplace_back(view_id); + const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image); + image.image_view_infos.emplace_back(view_info); + image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } @@ -225,13 +218,13 @@ void TextureCache::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); return; } auto& image_ids = page_it.value(); const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); return; } image_ids.erase(vector_it); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a11201c4..f59f16c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,7 @@ public: Image& FindImage(const ImageInfo& info, VAddr cpu_address); /// Retrieves the render target with specified properties - ImageView& RenderTarget(VAddr cpu_address, u32 pitch); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); /// Reuploads image contents. void RefreshImage(Image& image);