From cd06d79fcad6183e294cdcdb72d66ef1ef5ccd46 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 30 Aug 2024 23:13:20 +0200 Subject: [PATCH] texture_cache: images overlap support --- src/video_core/texture_cache/image.cpp | 68 +++++++ src/video_core/texture_cache/image.h | 5 + src/video_core/texture_cache/image_info.cpp | 71 +++++++ src/video_core/texture_cache/image_info.h | 9 + .../texture_cache/texture_cache.cpp | 174 +++++++++++++----- src/video_core/texture_cache/texture_cache.h | 17 +- src/video_core/texture_cache/types.h | 2 + 7 files changed, 299 insertions(+), 47 deletions(-) diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 0b725655..0ba40939 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -242,6 +242,74 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } +void Image::CopyImage(const Image& image) { + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + auto cmdbuf = scheduler->CommandBuffer(); + + boost::container::small_vector image_copy{}; + for (u32 m = 0; m < image.info.resources.levels; ++m) { + const auto mip_w = std::max(info.size.width >> m, 1u); + const auto mip_h = std::max(info.size.height >> m, 1u); + const auto mip_d = std::max(info.size.depth >> m, 1u); + + image_copy.emplace_back(vk::ImageCopy{ + .srcSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .dstSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .extent = {mip_w, mip_h, mip_d}, + }); + } + cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + + Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); +} + +void Image::CopyMip(const Image& image, u32 mip) { + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + auto cmdbuf = scheduler->CommandBuffer(); + + const auto mip_w = std::max(info.size.width >> mip, 1u); + const auto mip_h = std::max(info.size.height >> mip, 1u); + const auto mip_d = std::max(info.size.depth >> mip, 1u); + + ASSERT(mip_w == image.info.size.width); + ASSERT(mip_h == image.info.size.height); + + const vk::ImageCopy image_copy{ + .srcSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .dstSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = info.resources.layers, + }, + .extent = {mip_w, mip_h, mip_d}, + }; + cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + + Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); +} + Image::~Image() = default; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 3df8ddb7..f932b25a 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,6 +32,7 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -95,6 +96,9 @@ struct Image { vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); + void CopyImage(const Image& image); + void CopyMip(const Image& image, u32 mip); + const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; ImageInfo info; @@ -112,6 +116,7 @@ struct Image { vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; boost::container::small_vector mip_hashes; + u64 tick_accessed_last{0}; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 4ac4aee8..14fbd4b9 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -174,6 +174,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const auto color_slice_sz = buffer.GetColorSliceSize(); guest_size_bytes = color_slice_sz * buffer.NumSlices(); mips_layout.emplace_back(color_slice_sz, pitch, 0); + tiling_idx = static_cast(buffer.attrib.tile_mode_index.Value()); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, @@ -287,4 +288,74 @@ void ImageInfo::UpdateSize() { guest_size_bytes *= resources.layers; } +bool ImageInfo::IsMipOf(const ImageInfo& info) const { + if (!IsCompatible(info)) { + return false; + } + + // Currently we expect only on level to be copied. + if (resources.levels != 1) { + return false; + } + + const int mip = info.resources.levels - resources.levels; + if (mip < 1) { + return false; + } + + const auto mip_w = std::max(info.size.width >> mip, 1u); + const auto mip_h = std::max(info.size.height >> mip, 1u); + if ((size.width != mip_w) || (size.height != mip_h)) { + return false; + } + + const auto mip_d = std::max(info.size.depth >> mip, 1u); + if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) { + // In case of 2D array to 3D copy, make sure we have proper number of layers. + if (resources.layers != mip_d) { + return false; + } + } else { + if (type != info.type) { + return false; + } + } + + // Check if the mip has correct size. + if (info.mips_layout.size() <= mip || info.mips_layout[mip].size != guest_size_bytes) { + return false; + } + + return true; +} + +bool ImageInfo::IsSliceOf(const ImageInfo& info) const { + if (!IsCompatible(info)) { + return false; + } + + // Array slices should be of the same type. + if (type != info.type) { + return false; + } + + // 2D dimensions of both images should be the same. + if ((size.width != info.size.width) || (size.height != info.size.height)) { + return false; + } + + // Check for size alignment. + const bool slice_size = info.guest_size_bytes / info.resources.layers; + if (guest_size_bytes % slice_size != 0) { + return false; + } + + // Ensure that address is aligned too. + if (((info.guest_address - guest_address) % guest_size_bytes) != 0) { + return false; + } + + return true; +} + } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index ddad318d..84d551f0 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -29,6 +29,15 @@ struct ImageInfo { bool IsPacked() const; bool IsDepthStencil() const; + bool IsMipOf(const ImageInfo& info) const; + bool IsSliceOf(const ImageInfo& info) const; + + /// Verifies if images are compatible for subresource merging. + bool IsCompatible(const ImageInfo& info) const { + return (pixel_format == info.pixel_format && tiling_idx == info.tiling_idx && + num_samples == info.num_samples && num_bits == info.num_bits); + } + void UpdateSize(); struct { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3354a8ec..8e5f7003 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -13,6 +13,7 @@ namespace VideoCore { static constexpr u64 PageShift = 12; +static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) @@ -43,7 +44,7 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. - UntrackImage(image, image_id); + UntrackImage(image_id); }); } @@ -53,46 +54,151 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { boost::container::small_vector deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } // TODO: Download image data back to host. - UnregisterImage(id); - DeleteImage(id); + FreeImage(id); } } +ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_image_id, + ImageId merged_image_id) { + auto& tex_cache_image = slot_images[cache_image_id]; + + if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address + if (image_info.size != tex_cache_image.info.size) { + // Very likely this kind of overlap is caused by allocation from a pool. We can assume + // it is safe to delete the image if it wasn't accessed in some amount of frames. + if (scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > + NumFramesBeforeRemoval) { + + FreeImage(cache_image_id); + } + return merged_image_id; + } + + if (image_info.pixel_format != tex_cache_image.info.pixel_format || + image_info.size != tex_cache_image.info.size || + image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) { + return merged_image_id; + } + + ImageId new_image_id{}; + if (image_info.type == tex_cache_image.info.type) { + new_image_id = ExpandImage(image_info, cache_image_id); + } else { + UNREACHABLE(); + } + return new_image_id; + } + + // Right overlap, the image requested is a possible subresource of the image from cache. + if (image_info.guest_address > tex_cache_image.info.guest_address) { + // Should be handled by view. No additional actions needed. + } else { + // Left overlap, the image from cache is a possible subresource of the image requested + if (!merged_image_id) { + // We need to have a larger, already allocated image to copy this one into + return {}; + } + + if (tex_cache_image.info.IsMipOf(image_info)) { + tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits::eTransferRead); + + const auto num_mips_to_copy = tex_cache_image.info.resources.levels; + ASSERT(num_mips_to_copy == 1); + + auto& merged_image = slot_images[merged_image_id]; + merged_image.CopyMip(tex_cache_image, image_info.resources.levels - 1); + + FreeImage(cache_image_id); + } + + if (tex_cache_image.info.IsSliceOf(image_info)) { + UNREACHABLE(); + } + } + + return merged_image_id; +} + +ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { + + const auto new_image_id = slot_images.insert(instance, scheduler, info); + RegisterImage(new_image_id); + + auto& src_image = slot_images[image_id]; + auto& new_image = slot_images[new_image_id]; + + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + new_image.CopyImage(src_image); + + FreeImage(image_id); + + TrackImage(new_image_id); + new_image.flags &= ~ImageFlagBits::CpuModified; + return new_image_id; +} + ImageId TextureCache::FindImage(const ImageInfo& info) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } std::unique_lock lock{mutex}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; ForEachImageInRegion( info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { - // Address and width must match. - if (image.cpu_addr != info.guest_address || image.info.size.width != info.size.width) { + // Ignore images scheduled for deletion + if (True(image.flags & ImageFlagBits::Deleted)) { return; } - if (info.IsDepthStencil() != image.info.IsDepthStencil() && - info.pixel_format != vk::Format::eR32Sfloat) { + + // Check if image is fully outside of the region + const auto in_image_cpu_addr = info.guest_address; + const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes; + if (in_image_cpu_addr_end <= image.cpu_addr) { return; } + if (in_image_cpu_addr >= image.cpu_addr_end) { + return; + } + image_ids.push_back(image_id); }); - // ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); - ImageId image_id{}; - if (image_ids.empty()) { + + // Check for a perfect match first + for (const auto& cache_id : image_ids) { + auto& cache_image = slot_images[cache_id]; + + if (cache_image.info.guest_address == info.guest_address && + cache_image.info.guest_size_bytes == info.guest_size_bytes && + cache_image.info.size == info.size) { + + ASSERT(cache_image.info.type == info.type); + ASSERT(cache_image.info.num_bits == info.num_bits); + image_id = cache_id; + break; + } + } + + // Try to resolve overlaps (if any) + if (!image_id) { + for (const auto& cache_id : image_ids) { + const auto& merged_info = image_id ? slot_images[image_id].info : info; + image_id = ResolveOverlap(merged_info, cache_id, image_id); + } + } + + // Create and register a new image + if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); RegisterImage(image_id); - } else { - image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } + slot_images[image_id].tick_accessed_last = scheduler.CurrentTick(); + return image_id; } @@ -135,31 +241,7 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& usage.texture = true; } - // These changes are temporary and should be removed once texture cache will handle subresources - // merging - auto view_info_tmp = view_info; - if (view_info_tmp.range.base.level > image.info.resources.levels - 1 || - view_info_tmp.range.base.layer > image.info.resources.layers - 1 || - view_info_tmp.range.extent.levels > image.info.resources.levels || - view_info_tmp.range.extent.layers > image.info.resources.layers) { - - LOG_DEBUG(Render_Vulkan, - "Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})", - view_info_tmp.range.base.level, view_info_tmp.range.extent.levels, - view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers, - image.info.resources.levels, image.info.resources.layers); - - view_info_tmp.range.base.level = - std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1); - view_info_tmp.range.base.layer = - std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1); - view_info_tmp.range.extent.levels = - std::min(view_info_tmp.range.extent.levels, image.info.resources.levels); - view_info_tmp.range.extent.layers = - std::min(view_info_tmp.range.extent.layers, image.info.resources.layers); - } - - return RegisterImageView(image_id, view_info_tmp); + return RegisterImageView(image_id, view_info); } ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, @@ -335,7 +417,8 @@ void TextureCache::UnregisterImage(ImageId image_id) { }); } -void TextureCache::TrackImage(Image& image, ImageId image_id) { +void TextureCache::TrackImage(ImageId image_id) { + auto& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Tracked)) { return; } @@ -343,7 +426,8 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) { tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); } -void TextureCache::UntrackImage(Image& image, ImageId image_id) { +void TextureCache::UntrackImage(ImageId image_id) { + auto& image = slot_images[image_id]; if (False(image.flags & ImageFlagBits::Tracked)) { return; } @@ -356,6 +440,8 @@ void TextureCache::DeleteImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + image.flags |= ImageFlagBits::Deleted; + // Remove any registered meta areas. const auto& meta_info = image.info.meta_info; if (meta_info.cmask_addr) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 31b1e393..03e289c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -65,8 +65,13 @@ public: return; } RefreshImage(image, custom_scheduler); - TrackImage(image, image_id); + TrackImage(image_id); } + + [[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id, + ImageId merged_image_id); + + [[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id); /// Reuploads image contents. void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); @@ -167,14 +172,20 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(Image& image, ImageId image_id); + void TrackImage(ImageId image_id); /// Stop tracking CPU reads and writes for image - void UntrackImage(Image& image, ImageId image_id); + void UntrackImage(ImageId image_id); /// Removes the image and any views/surface metas that reference it. void DeleteImage(ImageId image_id); + void FreeImage(ImageId image_id) { + UntrackImage(image_id); + UnregisterImage(image_id); + DeleteImage(image_id); + } + private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 45ffe251..bcef1935 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -36,6 +36,8 @@ struct Extent3D { u32 width; u32 height; u32 depth; + + auto operator<=>(const Extent3D&) const = default; }; struct SubresourceLayers {