diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bd38f64..4fc91042 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -453,6 +453,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_swapchain.h src/video_core/texture_cache/image.cpp src/video_core/texture_cache/image.h + src/video_core/texture_cache/image_info.cpp + src/video_core/texture_cache/image_info.h src/video_core/texture_cache/image_view.cpp src/video_core/texture_cache/image_view.h src/video_core/texture_cache/sampler.cpp diff --git a/src/core/libraries/videoout/buffer.h b/src/core/libraries/videoout/buffer.h index 88dad852..8f49be59 100644 --- a/src/core/libraries/videoout/buffer.h +++ b/src/core/libraries/videoout/buffer.h @@ -62,7 +62,6 @@ struct BufferAttribute { struct BufferAttributeGroup { bool is_occupied; BufferAttribute attrib; - u32 size_in_bytes; }; struct VideoOutBuffer { diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index ece4ea01..e74fb10f 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -122,8 +122,6 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co auto& group = port->groups[group_index]; std::memcpy(&group.attrib, attribute, sizeof(BufferAttribute)); - group.size_in_bytes = - attribute->height * attribute->pitch_in_pixel * PixelFormatBpp(attribute->pixel_format); group.is_occupied = true; for (u32 i = 0; i < bufferNum; i++) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index ce95b370..911983a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -100,11 +100,11 @@ Id EmitFPNeg64(EmitContext& ctx, Id value) { } Id EmitFPSin(EmitContext& ctx, Id value) { - return ctx.OpSin(ctx.F32[1], value); + return ctx.OpSin(ctx.F32[1], ctx.OpFMul(ctx.F32[1], ctx.pi_x2, value)); } Id EmitFPCos(EmitContext& ctx, Id value) { - return ctx.OpCos(ctx.F32[1], value); + return ctx.OpCos(ctx.F32[1], ctx.OpFMul(ctx.F32[1], ctx.pi_x2, value)); } Id EmitFPExp2(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 3ea01a1d..16c10f53 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1,11 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include +#include + +#include + namespace Shader::Backend::SPIRV { namespace { @@ -100,6 +103,8 @@ void EmitContext::DefineArithmeticTypes() { u32_zero_value = ConstU32(0U); f32_zero_value = ConstF32(0.0f); + pi_x2 = ConstF32(2.0f * float{std::numbers::pi}); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32"); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0f8081fd..fc678344 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -143,6 +143,8 @@ public: Id full_result_i32x2; Id full_result_u32x2; + Id pi_x2; + Id true_value{}; Id false_value{}; Id u32_one_value{}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 84539c28..bffec92b 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -321,7 +321,7 @@ struct Liverpool { struct DepthBuffer { enum class ZFormat : u32 { - Invald = 0, + Invalid = 0, Z16 = 1, Z32Float = 3, }; @@ -367,8 +367,14 @@ struct Liverpool { return u64(z_read_base) << 8; } - size_t GetSizeAligned() const { - return depth_slice.tile_max * 8; + u32 NumSamples() const { + return 1u << z_info.num_samples; // spec doesn't say it is a log2 + } + + size_t GetDepthSliceSize() const { + ASSERT(z_info.format != ZFormat::Invalid); + const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2; + return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); } }; @@ -733,12 +739,19 @@ struct Liverpool { return VAddr(fmask_base_address) << 8; } - size_t GetSizeAligned() const { + u32 NumSamples() const { + return 1 << attrib.num_fragments_log2; + } + + u32 NumSlices() const { + return view.slice_max + 1; + } + + size_t GetColorSliceSize() const { const auto num_bytes_per_element = NumBits(info.format) / 8u; - const auto slice_size = (slice.tile_max + 1) * 64u; - const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element; - ASSERT(total_size > 0); - return total_size; + const auto slice_size = + num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); + return slice_size; } TilingMode GetTilingMode() const { @@ -819,6 +832,17 @@ struct Liverpool { BitField<6, 1, u32> depth_compress_disable; }; + union DepthView { + BitField<0, 11, u32> slice_start; + BitField<13, 11, u32> slice_max; + BitField<24, 1, u32> z_read_only; + BitField<25, 1, u32> stencil_read_only; + + u32 NumSlices() const { + return slice_max + 1u; + } + }; + union AaConfig { BitField<0, 3, u32> msaa_num_samples; BitField<4, 1, u32> aa_mask_centroid_dtmn; @@ -849,7 +873,9 @@ struct Liverpool { ComputeProgram cs_program; INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; - INSERT_PADDING_WORDS(4); + INSERT_PADDING_WORDS(1); + DepthView depth_view; + INSERT_PADDING_WORDS(2); Address depth_htile_data_base; INSERT_PADDING_WORDS(2); float depth_bounds_min; @@ -1050,6 +1076,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); +static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002); static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 1247c025..6ab3306b 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -36,6 +36,12 @@ struct Buffer { u32 element_size : 2; u32 index_stride : 2; u32 add_tid_enable : 1; + u32 : 6; + u32 type : 2; // overlaps with T# type, so should be 0 for buffer + + bool Valid() const { + return type == 0u; + } operator bool() const noexcept { return base_address != 0; @@ -106,22 +112,25 @@ constexpr std::string_view NameOf(ImageType type) { } enum class TilingMode : u32 { - Depth_MicroTiled = 0x5u, + Depth_MacroTiled = 0u, Display_Linear = 0x8u, Display_MacroTiled = 0xAu, Texture_MicroTiled = 0xDu, + Texture_MacroTiled = 0xEu, }; constexpr std::string_view NameOf(TilingMode type) { switch (type) { - case TilingMode::Depth_MicroTiled: - return "Depth_MicroTiled"; + case TilingMode::Depth_MacroTiled: + return "Depth_MacroTiled"; case TilingMode::Display_Linear: return "Display_Linear"; case TilingMode::Display_MacroTiled: return "Display_MacroTiled"; case TilingMode::Texture_MicroTiled: return "Texture_MicroTiled"; + case TilingMode::Texture_MacroTiled: + return "Texture_MacroTiled"; default: return "Unknown"; } @@ -149,7 +158,7 @@ struct Image { u64 pow2pad : 1; u64 mtype2 : 1; u64 atc : 1; - u64 type : 4; + u64 type : 4; // overlaps with V# type, so shouldn't be 0 for buffer u64 depth : 13; u64 pitch : 14; @@ -162,6 +171,10 @@ struct Image { u64 lod_hw_cnt_en : 1; u64 : 43; + bool Valid() const { + return (type & 0x8u) != 0; + } + VAddr Address() const { return base_address << 8; } @@ -201,17 +214,19 @@ struct Image { } TilingMode GetTilingMode() const { + if (tiling_index >= 0 && tiling_index <= 7) { + return tiling_index == 5 ? TilingMode::Texture_MicroTiled + : TilingMode::Depth_MacroTiled; + } + if (tiling_index == 0x13) { + return TilingMode::Texture_MicroTiled; + } return static_cast(tiling_index); } bool IsTiled() const { return GetTilingMode() != TilingMode::Display_Linear; } - - size_t GetSizeAligned() const { - // TODO: Derive this properly from tiling params - return Pitch() * (height + 1) * NumComponents(GetDataFmt()); - } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 954b6870..651b863f 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -505,7 +505,7 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format == DepthBuffer::StencilFormat::Stencil8) { return vk::Format::eD16UnormS8Uint; } - if (z_format == DepthBuffer::ZFormat::Invald && + if (z_format == DepthBuffer::ZFormat::Invalid && stencil_format == DepthBuffer::StencilFormat::Invalid) { return vk::Format::eUndefined; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6d498c75..098f14d9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -182,8 +182,9 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { info.size = VideoCore::Extent3D{splash->GetImageInfo().width, splash->GetImageInfo().height, 1}; info.pitch = splash->GetImageInfo().width; + info.guest_address = VAddr(splash->GetImageData().data()); info.guest_size_bytes = splash->GetImageData().size(); - splash_img.emplace(instance, scheduler, info, VAddr(splash->GetImageData().data())); + splash_img.emplace(instance, scheduler, info); texture_cache.RefreshImage(*splash_img); } frame = PrepareFrameInternal(*splash_img); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index bf5d220b..701d3d14 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -40,7 +40,7 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { - const auto info = VideoCore::ImageInfo{attribute}; + const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info, cpu_address); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image); @@ -54,7 +54,7 @@ public: VideoCore::Image& RegisterVideoOutSurface( const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); - const auto info = VideoCore::ImageInfo{attribute}; + const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info, cpu_address); return texture_cache.GetImage(image_id); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index f70e49a7..954adf44 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -128,7 +128,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s for (const auto& image_desc : info.images) { const auto tsharp = info.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); - const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage); + const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a3ba2f77..f119bc77 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -366,7 +366,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& for (const auto& image_desc : stage.images) { const auto& tsharp = tsharps.emplace_back( stage.ReadUd(image_desc.sgpr_base, image_desc.dword_offset)); - const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage); + const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fff9bc33..fe52d074 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -113,7 +113,7 @@ void Rasterizer::BeginRendering() { } const auto& hint = liverpool->last_cb_extent[col_buf_id]; - const auto& image_view = texture_cache.RenderTarget(col_buf, hint); + const auto& image_view = texture_cache.FindRenderTarget(col_buf, hint); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); @@ -130,14 +130,15 @@ void Rasterizer::BeginRendering() { texture_cache.TouchMeta(col_buf.CmaskAddress(), false); } - if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald && + if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invalid && regs.depth_buffer.Address() != 0) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); const bool is_clear = regs.depth_render_control.depth_clear_enable || texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; - const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint, - regs.depth_control.depth_write_enable); + const auto& image_view = texture_cache.FindDepthTarget( + regs.depth_buffer, regs.depth_view.NumSlices(), htile_address, hint, + regs.depth_control.depth_write_enable); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f06492ef..2b444774 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" -#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -14,25 +13,8 @@ namespace VideoCore { using namespace Vulkan; -using VideoOutFormat = Libraries::VideoOut::PixelFormat; using Libraries::VideoOut::TilingMode; -static vk::Format ConvertPixelFormat(const VideoOutFormat format) { - switch (format) { - case VideoOutFormat::A8R8G8B8Srgb: - return vk::Format::eB8G8R8A8Srgb; - case VideoOutFormat::A8B8G8R8Srgb: - return vk::Format::eR8G8B8A8Srgb; - case VideoOutFormat::A2R10G10B10: - case VideoOutFormat::A2R10G10B10Srgb: - return vk::Format::eA2R10G10B10UnormPack32; - default: - break; - } - UNREACHABLE_MSG("Unknown format={}", static_cast(format)); - return {}; -} - bool ImageInfo::IsBlockCoded() const { switch (pixel_format) { case vk::Format::eBc1RgbaSrgbBlock: @@ -101,93 +83,6 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { return usage; } -static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { - switch (type) { - case AmdGpu::ImageType::Color1D: - case AmdGpu::ImageType::Color1DArray: - return vk::ImageType::e1D; - case AmdGpu::ImageType::Color2D: - case AmdGpu::ImageType::Cube: - case AmdGpu::ImageType::Color2DArray: - return vk::ImageType::e2D; - case AmdGpu::ImageType::Color3D: - return vk::ImageType::e3D; - default: - UNREACHABLE(); - } -} - -ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept { - const auto& attrib = group.attrib; - is_tiled = attrib.tiling_mode == TilingMode::Tile; - tiling_mode = - is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear; - pixel_format = ConvertPixelFormat(attrib.pixel_format); - type = vk::ImageType::e2D; - size.width = attrib.width; - size.height = attrib.height; - pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); - const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float; - ASSERT(is_32bpp); - if (!is_tiled) { - guest_size_bytes = pitch * size.height * 4; - return; - } - if (Config::isNeoMode()) { - guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4; - } else { - guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4; - } - usage.vo_buffer = true; -} - -ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { - is_tiled = buffer.IsTiled(); - tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); - num_samples = 1 << buffer.attrib.num_fragments_log2; - type = vk::ImageType::e2D; - size.width = hint.Valid() ? hint.width : buffer.Pitch(); - size.height = hint.Valid() ? hint.height : buffer.Height(); - size.depth = 1; - pitch = size.width; - guest_size_bytes = buffer.GetSizeAligned(); - meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; - meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; - usage.render_target = true; -} - -ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { - is_tiled = false; - pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); - type = vk::ImageType::e2D; - num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 - size.width = hint.Valid() ? hint.width : buffer.Pitch(); - size.height = hint.Valid() ? hint.height : buffer.Height(); - size.depth = 1; - pitch = size.width; - guest_size_bytes = buffer.GetSizeAligned(); - meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; - usage.depth_target = true; -} - -ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { - is_tiled = image.IsTiled(); - tiling_mode = image.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); - type = ConvertImageType(image.GetType()); - size.width = image.width + 1; - size.height = image.height + 1; - size.depth = 1; - pitch = image.Pitch(); - resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(); - guest_size_bytes = image.GetSizeAligned(); - usage.texture = true; -} - UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} @@ -217,9 +112,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) { } Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - const ImageInfo& info_, VAddr cpu_addr) + const ImageInfo& info_) : instance{&instance_}, scheduler{&scheduler_}, info{info_}, - image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr}, + image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { ASSERT(info.pixel_format != vk::Format::eUndefined); vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index e91f1b58..97ceaa09 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -9,6 +9,7 @@ #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -34,47 +35,6 @@ enum ImageFlagBits : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) -struct ImageInfo { - ImageInfo() = default; - explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; - explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - explicit ImageInfo(const AmdGpu::Image& image) noexcept; - - bool IsTiled() const { - return tiling_mode != AmdGpu::TilingMode::Display_Linear; - } - bool IsBlockCoded() const; - bool IsPacked() const; - bool IsDepthStencil() const; - - struct { - VAddr cmask_addr; - VAddr fmask_addr; - VAddr htile_addr; - } meta_info{}; - - struct { - u32 texture : 1; - u32 storage : 1; - u32 render_target : 1; - u32 depth_target : 1; - u32 vo_buffer : 1; - } usage{}; // Usage data tracked during image lifetime - - bool is_tiled = false; - vk::Format pixel_format = vk::Format::eUndefined; - vk::ImageType type = vk::ImageType::e1D; - SubresourceExtent resources; - Extent3D size{1, 1, 1}; - u32 num_samples = 1; - u32 pitch = 0; - u32 guest_size_bytes = 0; - AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear}; -}; - struct UniqueImage { explicit UniqueImage(vk::Device device, VmaAllocator allocator); ~UniqueImage(); @@ -109,8 +69,7 @@ private: constexpr Common::SlotId NULL_IMAGE_ID{0}; struct Image { - explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - const ImageInfo& info, VAddr cpu_addr); + Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageInfo& info); ~Image(); Image(const Image&) = delete; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 00000000..41ad0938 --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp @@ -0,0 +1,268 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "common/config.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/texture_cache/image_info.h" + +namespace VideoCore { + +using namespace Vulkan; +using Libraries::VideoOut::TilingMode; +using VideoOutFormat = Libraries::VideoOut::PixelFormat; + +static vk::Format ConvertPixelFormat(const VideoOutFormat format) { + switch (format) { + case VideoOutFormat::A8R8G8B8Srgb: + return vk::Format::eB8G8R8A8Srgb; + case VideoOutFormat::A8B8G8R8Srgb: + return vk::Format::eR8G8B8A8Srgb; + case VideoOutFormat::A2R10G10B10: + case VideoOutFormat::A2R10G10B10Srgb: + return vk::Format::eA2R10G10B10UnormPack32; + default: + break; + } + UNREACHABLE_MSG("Unknown format={}", static_cast(format)); + return {}; +} + +static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { + switch (type) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + return vk::ImageType::e1D; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Cube: + case AmdGpu::ImageType::Color2DArray: + return vk::ImageType::e2D; + case AmdGpu::ImageType::Color3D: + return vk::ImageType::e3D; + default: + UNREACHABLE(); + } +} + +// clang-format off +// The table of macro tiles parameters for given tiling index (row) and bpp (column) +static constexpr std::array macro_tile_extents{ + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0B + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 10 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 11 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A +}; +// clang-format on + +static constexpr std::pair micro_tile_extent{8u, 8u}; +static constexpr auto hw_pipe_interleave = 256u; + +static constexpr std::pair GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) { + ASSERT(num_samples == 1); + const auto row = tiling_idx * 4; + const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64 + return macro_tile_extents[row + column]; +} + +static constexpr size_t ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, u32 num_samples) { + const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8)); + auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); + const auto height_aligned = height; + size_t log_sz = 1; + const auto slice_align = std::max(64u, hw_pipe_interleave / (bpp + 7) / 8); + while (log_sz % slice_align) { + log_sz = pitch_aligned * height_aligned * num_samples; + pitch_aligned += pitch_align; + } + return (log_sz * bpp + 7) / 8; +} + +static constexpr size_t ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples) { + const auto& [pitch_align, height_align] = micro_tile_extent; + auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); + const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); + size_t log_sz = 1; + while (log_sz % 256) { + log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; + pitch_aligned += 8; + } + return log_sz; +} + +static constexpr size_t ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples, + u32 tiling_idx) { + const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples); + ASSERT(pitch_align != 0 && height_align != 0); + const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); + const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); + return (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; +} + +ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, + VAddr cpu_address) noexcept { + const auto& attrib = group.attrib; + is_tiled = attrib.tiling_mode == TilingMode::Tile; + tiling_mode = + is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear; + pixel_format = ConvertPixelFormat(attrib.pixel_format); + type = vk::ImageType::e2D; + size.width = attrib.width; + size.height = attrib.height; + pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); + usage.vo_buffer = true; + const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float; + ASSERT(is_32bpp); + + guest_address = cpu_address; + if (!is_tiled) { + guest_size_bytes = pitch * size.height * 4; + } else { + if (Config::isNeoMode()) { + guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4; + } else { + guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4; + } + } + mips_layout.emplace_back(0, guest_size_bytes); +} + +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { + is_tiled = buffer.IsTiled(); + tiling_mode = buffer.GetTilingMode(); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + num_samples = 1 << buffer.attrib.num_fragments_log2; + type = vk::ImageType::e2D; + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); + size.depth = 1; + pitch = buffer.Pitch(); + resources.layers = buffer.NumSlices(); + meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; + meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; + usage.render_target = true; + + guest_address = buffer.Address(); + const auto color_slice_sz = buffer.GetColorSliceSize(); + guest_size_bytes = color_slice_sz * buffer.NumSlices(); + mips_layout.emplace_back(0, color_slice_sz); +} + +ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, + VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { + is_tiled = false; + pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); + type = vk::ImageType::e2D; + num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); + size.depth = 1; + pitch = size.width; + resources.layers = num_slices; + meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; + usage.depth_target = true; + + guest_address = buffer.Address(); + const auto depth_slice_sz = buffer.GetDepthSliceSize(); + guest_size_bytes = depth_slice_sz * num_slices; + mips_layout.emplace_back(0, depth_slice_sz); +} + +ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { + is_tiled = image.IsTiled(); + tiling_mode = image.GetTilingMode(); + pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); + type = ConvertImageType(image.GetType()); + is_cube = image.GetType() == AmdGpu::ImageType::Cube; + is_volume = image.GetType() == AmdGpu::ImageType::Color3D; + size.width = image.width + 1; + size.height = image.height + 1; + size.depth = is_volume ? image.depth + 1 : 1; + pitch = image.Pitch(); + resources.levels = image.NumLevels(); + resources.layers = image.NumLayers(); + usage.texture = true; + + guest_address = image.Address(); + + mips_layout.reserve(resources.levels); + const auto num_bits = NumBits(image.GetDataFmt()); + const auto is_block = IsBlockCoded(); + const auto is_pow2 = image.pow2pad; + + guest_size_bytes = 0; + for (auto mip = 0u; mip < resources.levels; ++mip) { + auto bpp = num_bits; + auto mip_w = pitch >> mip; + auto mip_h = size.height >> mip; + if (is_block) { + mip_w = (mip_w + 3) / 4; + mip_h = (mip_h + 3) / 4; + bpp *= 16; + } + mip_w = std::max(mip_w, 1u); + mip_h = std::max(mip_h, 1u); + auto mip_d = std::max(size.depth >> mip, 1u); + + if (is_pow2) { + mip_w = std::bit_ceil(mip_w); + mip_h = std::bit_ceil(mip_h); + mip_d = std::bit_ceil(mip_d); + } + + size_t mip_size = 0; + switch (tiling_mode) { + case AmdGpu::TilingMode::Display_Linear: { + ASSERT(!is_cube); + mip_size = ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples); + break; + } + case AmdGpu::TilingMode::Texture_MicroTiled: { + mip_size = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); + break; + } + case AmdGpu::TilingMode::Display_MacroTiled: + case AmdGpu::TilingMode::Texture_MacroTiled: + case AmdGpu::TilingMode::Depth_MacroTiled: { + ASSERT(!is_cube && !is_block); + ASSERT(num_samples == 1); + ASSERT(num_bits <= 64); + mip_size = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); + break; + } + default: { + UNREACHABLE(); + } + } + mip_size *= mip_d; + + mips_layout.emplace_back(guest_size_bytes, mip_size); + guest_size_bytes += mip_size; + } + guest_size_bytes *= resources.layers; +} + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 00000000..b98410b9 --- /dev/null +++ b/src/video_core/texture_cache/image_info.h @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/enum.h" +#include "common/types.h" +#include "core/libraries/videoout/buffer.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCore { + +struct ImageInfo { + ImageInfo() = default; + ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept; + ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; + ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; + ImageInfo(const AmdGpu::Image& image) noexcept; + + bool IsTiled() const { + return tiling_mode != AmdGpu::TilingMode::Display_Linear; + } + bool IsBlockCoded() const; + bool IsPacked() const; + bool IsDepthStencil() const; + + struct { + VAddr cmask_addr; + VAddr fmask_addr; + VAddr htile_addr; + } meta_info{}; + + struct { + u32 texture : 1; + u32 storage : 1; + u32 render_target : 1; + u32 depth_target : 1; + u32 stencil : 1; + u32 vo_buffer : 1; + } usage{}; // Usage data tracked during image lifetime + + bool is_cube = false; + bool is_volume = false; + bool is_tiled = false; + bool is_read_only = false; + vk::Format pixel_format = vk::Format::eUndefined; + vk::ImageType type = vk::ImageType::e1D; + SubresourceExtent resources; + Extent3D size{1, 1, 1}; + u32 num_samples = 1; + u32 pitch = 0; + AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear}; + std::vector> mips_layout; + VAddr guest_address{0}; + u32 guest_size_bytes{0}; +}; + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8cd6f893..192bd9ce 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -89,7 +89,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ImageInfo info; info.pixel_format = vk::Format::eR8G8B8A8Unorm; info.type = vk::ImageType::e2D; - const ImageId null_id = slot_images.insert(instance, scheduler, info, 0); + const ImageId null_id = slot_images.insert(instance, scheduler, info); ASSERT(null_id.index == 0); ImageViewInfo view_info; @@ -112,26 +112,27 @@ void TextureCache::OnCpuWrite(VAddr address) { }); } -ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) { +ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; - ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { - // Address and width must match. - if (image.cpu_addr != cpu_address || image.info.size.width != info.size.width) { - return; - } - if (info.IsDepthStencil() != image.info.IsDepthStencil() && - info.pixel_format != vk::Format::eR32Sfloat) { - return; - } - image_ids.push_back(image_id); - }); + ForEachImageInRegion( + info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { + // Address and width must match. + if (image.cpu_addr != info.guest_address || image.info.size.width != info.size.width) { + return; + } + if (info.IsDepthStencil() != image.info.IsDepthStencil() && + info.pixel_format != vk::Format::eR32Sfloat) { + return; + } + image_ids.push_back(image_id); + }); ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); ImageId image_id{}; if (image_ids.empty()) { - image_id = slot_images.insert(instance, scheduler, info, cpu_address); + image_id = slot_images.insert(instance, scheduler, info); RegisterImage(image_id); } else { image_id = image_ids[0]; @@ -169,9 +170,9 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) { +ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) { const ImageInfo info{desc}; - const ImageId image_id = FindImage(info, desc.Address()); + const ImageId image_id = FindImage(info); Image& image = slot_images[image_id]; auto& usage = image.info.usage; @@ -190,10 +191,10 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag return RegisterImageView(image_id, view_info); } -ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint) { +ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - const ImageId image_id = FindImage(info, buffer.Address()); + const ImageId image_id = FindImage(info); Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; @@ -207,11 +208,12 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff return RegisterImageView(image_id, view_info); } -ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled) { - const ImageInfo info{buffer, htile_address, hint}; - const ImageId image_id = FindImage(info, buffer.Address(), false); +ImageView& TextureCache::FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + u32 num_slices, VAddr htile_address, + const AmdGpu::Liverpool::CbDbExtent& hint, + bool write_enabled) { + const ImageInfo info{buffer, num_slices, htile_address, hint}; + const ImageId image_id = FindImage(info, false); Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; @@ -244,21 +246,24 @@ void TextureCache::RefreshImage(Image& image) { return; } + ASSERT(image.info.resources.levels == image.info.mips_layout.size()); const u8* image_data = reinterpret_cast(image.cpu_addr); for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = image.info.size.width >> m; - const u32 height = image.info.size.height >> m; - const u32 map_size = width * height * image.info.resources.layers; + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = image.info.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const u32 map_size = image.info.mips_layout[m].second * image.info.resources.layers; // Upload data to the staging buffer. const auto [data, offset, _] = staging.Map(map_size, 16); if (image.info.is_tiled) { ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); } else { - std::memcpy(data, image_data, map_size); + std::memcpy(data, + image_data + image.info.mips_layout[m].first * image.info.resources.layers, + map_size); } staging.Commit(map_size); - image_data += map_size; // Copy to the image. const vk::BufferImageCopy image_copy = { @@ -272,7 +277,7 @@ void TextureCache::RefreshImage(Image& image) { .layerCount = u32(image.info.resources.layers), }, .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, 1}, + .imageExtent = {width, height, depth}, }; scheduler.EndRendering(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 421651ff..8a618983 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -47,20 +47,21 @@ public: /// Invalidates any image in the logical page range. void OnCpuWrite(VAddr address); - /// Retrieves the image handle of the image with the provided attributes and address. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, VAddr cpu_address, - bool refresh_on_create = true); + /// Retrieves the image handle of the image with the provided attributes. + [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage); + [[nodiscard]] ImageView& FindTexture(const AmdGpu::Image& image, bool is_storage); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint); - [[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled); + [[nodiscard]] ImageView& FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); + + /// Retrieves the depth target with specified properties + [[nodiscard]] ImageView& FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + u32 num_slices, VAddr htile_address, + const AmdGpu::Liverpool::CbDbExtent& hint, + bool write_enabled); /// Reuploads image contents. void RefreshImage(Image& image); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index c780ca60..e097ba3e 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -19,10 +19,6 @@ namespace VideoCore { -static u32 IntLog2(u32 i) { - return 31 - __builtin_clz(i | 1u); -} - class TileManager32 { public: u32 m_macro_tile_height = 0; @@ -81,8 +77,8 @@ public: static u32 getBankIdx(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) { - const u32 x_shift_offset = IntLog2(bank_width * num_pipes); - const u32 y_shift_offset = IntLog2(bank_height); + const u32 x_shift_offset = std::bit_width(bank_width * num_pipes) - 1; + const u32 y_shift_offset = std::bit_width(bank_height) - 1; const u32 xs = x >> x_shift_offset; const u32 ys = y >> y_shift_offset; u32 bank = 0; @@ -210,8 +206,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { const DetilerContext* TileManager::GetDetiler(const Image& image) const { const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled || - image.info.tiling_mode == AmdGpu::TilingMode::Depth_MicroTiled) { + if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) { switch (format) { case vk::Format::eR8Uint: return &detilers[DetilerType::Micro8x1];