texture_cache: images overlap support

This commit is contained in:
psucien 2024-08-30 23:13:20 +02:00
parent ca1613258f
commit cd06d79fca
7 changed files with 299 additions and 47 deletions

View File

@ -242,6 +242,74 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
void Image::CopyImage(const Image& image) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
auto cmdbuf = scheduler->CommandBuffer();
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
for (u32 m = 0; m < image.info.resources.levels; ++m) {
const auto mip_w = std::max(info.size.width >> m, 1u);
const auto mip_h = std::max(info.size.height >> m, 1u);
const auto mip_d = std::max(info.size.depth >> m, 1u);
image_copy.emplace_back(vk::ImageCopy{
.srcSubresource{
.aspectMask = image.aspect_mask,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = image.info.resources.layers,
},
.dstSubresource{
.aspectMask = image.aspect_mask,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = image.info.resources.layers,
},
.extent = {mip_w, mip_h, mip_d},
});
}
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
void Image::CopyMip(const Image& image, u32 mip) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
auto cmdbuf = scheduler->CommandBuffer();
const auto mip_w = std::max(info.size.width >> mip, 1u);
const auto mip_h = std::max(info.size.height >> mip, 1u);
const auto mip_d = std::max(info.size.depth >> mip, 1u);
ASSERT(mip_w == image.info.size.width);
ASSERT(mip_h == image.info.size.height);
const vk::ImageCopy image_copy{
.srcSubresource{
.aspectMask = image.aspect_mask,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = image.info.resources.layers,
},
.dstSubresource{
.aspectMask = image.aspect_mask,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = info.resources.layers,
},
.extent = {mip_w, mip_h, mip_d},
};
cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
Image::~Image() = default;
} // namespace VideoCore

View File

@ -32,6 +32,7 @@ enum ImageFlagBits : u32 {
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@ -95,6 +96,9 @@ struct Image {
vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
void CopyImage(const Image& image);
void CopyMip(const Image& image, u32 mip);
const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler;
ImageInfo info;
@ -112,6 +116,7 @@ struct Image {
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
boost::container::small_vector<u64, 14> mip_hashes;
u64 tick_accessed_last{0};
};
} // namespace VideoCore

View File

@ -174,6 +174,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const auto color_slice_sz = buffer.GetColorSliceSize();
guest_size_bytes = color_slice_sz * buffer.NumSlices();
mips_layout.emplace_back(color_slice_sz, pitch, 0);
tiling_idx = static_cast<u32>(buffer.attrib.tile_mode_index.Value());
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
@ -287,4 +288,74 @@ void ImageInfo::UpdateSize() {
guest_size_bytes *= resources.layers;
}
bool ImageInfo::IsMipOf(const ImageInfo& info) const {
if (!IsCompatible(info)) {
return false;
}
// Currently we expect only on level to be copied.
if (resources.levels != 1) {
return false;
}
const int mip = info.resources.levels - resources.levels;
if (mip < 1) {
return false;
}
const auto mip_w = std::max(info.size.width >> mip, 1u);
const auto mip_h = std::max(info.size.height >> mip, 1u);
if ((size.width != mip_w) || (size.height != mip_h)) {
return false;
}
const auto mip_d = std::max(info.size.depth >> mip, 1u);
if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) {
// In case of 2D array to 3D copy, make sure we have proper number of layers.
if (resources.layers != mip_d) {
return false;
}
} else {
if (type != info.type) {
return false;
}
}
// Check if the mip has correct size.
if (info.mips_layout.size() <= mip || info.mips_layout[mip].size != guest_size_bytes) {
return false;
}
return true;
}
bool ImageInfo::IsSliceOf(const ImageInfo& info) const {
if (!IsCompatible(info)) {
return false;
}
// Array slices should be of the same type.
if (type != info.type) {
return false;
}
// 2D dimensions of both images should be the same.
if ((size.width != info.size.width) || (size.height != info.size.height)) {
return false;
}
// Check for size alignment.
const bool slice_size = info.guest_size_bytes / info.resources.layers;
if (guest_size_bytes % slice_size != 0) {
return false;
}
// Ensure that address is aligned too.
if (((info.guest_address - guest_address) % guest_size_bytes) != 0) {
return false;
}
return true;
}
} // namespace VideoCore

View File

@ -29,6 +29,15 @@ struct ImageInfo {
bool IsPacked() const;
bool IsDepthStencil() const;
bool IsMipOf(const ImageInfo& info) const;
bool IsSliceOf(const ImageInfo& info) const;
/// Verifies if images are compatible for subresource merging.
bool IsCompatible(const ImageInfo& info) const {
return (pixel_format == info.pixel_format && tiling_idx == info.tiling_idx &&
num_samples == info.num_samples && num_bits == info.num_bits);
}
void UpdateSize();
struct {

View File

@ -13,6 +13,7 @@
namespace VideoCore {
static constexpr u64 PageShift = 12;
static constexpr u64 NumFramesBeforeRemoval = 32;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
BufferCache& buffer_cache_, PageManager& tracker_)
@ -43,7 +44,7 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image, image_id);
UntrackImage(image_id);
});
}
@ -53,46 +54,151 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
// TODO: Download image data back to host.
UnregisterImage(id);
DeleteImage(id);
FreeImage(id);
}
}
ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_image_id,
ImageId merged_image_id) {
auto& tex_cache_image = slot_images[cache_image_id];
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
if (image_info.size != tex_cache_image.info.size) {
// Very likely this kind of overlap is caused by allocation from a pool. We can assume
// it is safe to delete the image if it wasn't accessed in some amount of frames.
if (scheduler.CurrentTick() - tex_cache_image.tick_accessed_last >
NumFramesBeforeRemoval) {
FreeImage(cache_image_id);
}
return merged_image_id;
}
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.size != tex_cache_image.info.size ||
image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) {
return merged_image_id;
}
ImageId new_image_id{};
if (image_info.type == tex_cache_image.info.type) {
new_image_id = ExpandImage(image_info, cache_image_id);
} else {
UNREACHABLE();
}
return new_image_id;
}
// Right overlap, the image requested is a possible subresource of the image from cache.
if (image_info.guest_address > tex_cache_image.info.guest_address) {
// Should be handled by view. No additional actions needed.
} else {
// Left overlap, the image from cache is a possible subresource of the image requested
if (!merged_image_id) {
// We need to have a larger, already allocated image to copy this one into
return {};
}
if (tex_cache_image.info.IsMipOf(image_info)) {
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits::eTransferRead);
const auto num_mips_to_copy = tex_cache_image.info.resources.levels;
ASSERT(num_mips_to_copy == 1);
auto& merged_image = slot_images[merged_image_id];
merged_image.CopyMip(tex_cache_image, image_info.resources.levels - 1);
FreeImage(cache_image_id);
}
if (tex_cache_image.info.IsSliceOf(image_info)) {
UNREACHABLE();
}
}
return merged_image_id;
}
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
const auto new_image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(new_image_id);
auto& src_image = slot_images[image_id];
auto& new_image = slot_images[new_image_id];
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
new_image.CopyImage(src_image);
FreeImage(image_id);
TrackImage(new_image_id);
new_image.flags &= ~ImageFlagBits::CpuModified;
return new_image_id;
}
ImageId TextureCache::FindImage(const ImageInfo& info) {
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID;
}
std::unique_lock lock{mutex};
boost::container::small_vector<ImageId, 2> image_ids;
boost::container::small_vector<ImageId, 8> image_ids;
ForEachImageInRegion(
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
// Address and width must match.
if (image.cpu_addr != info.guest_address || image.info.size.width != info.size.width) {
// Ignore images scheduled for deletion
if (True(image.flags & ImageFlagBits::Deleted)) {
return;
}
if (info.IsDepthStencil() != image.info.IsDepthStencil() &&
info.pixel_format != vk::Format::eR32Sfloat) {
// Check if image is fully outside of the region
const auto in_image_cpu_addr = info.guest_address;
const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes;
if (in_image_cpu_addr_end <= image.cpu_addr) {
return;
}
if (in_image_cpu_addr >= image.cpu_addr_end) {
return;
}
image_ids.push_back(image_id);
});
// ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
ImageId image_id{};
if (image_ids.empty()) {
// Check for a perfect match first
for (const auto& cache_id : image_ids) {
auto& cache_image = slot_images[cache_id];
if (cache_image.info.guest_address == info.guest_address &&
cache_image.info.guest_size_bytes == info.guest_size_bytes &&
cache_image.info.size == info.size) {
ASSERT(cache_image.info.type == info.type);
ASSERT(cache_image.info.num_bits == info.num_bits);
image_id = cache_id;
break;
}
}
// Try to resolve overlaps (if any)
if (!image_id) {
for (const auto& cache_id : image_ids) {
const auto& merged_info = image_id ? slot_images[image_id].info : info;
image_id = ResolveOverlap(merged_info, cache_id, image_id);
}
}
// Create and register a new image
if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(image_id);
} else {
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
}
slot_images[image_id].tick_accessed_last = scheduler.CurrentTick();
return image_id;
}
@ -135,31 +241,7 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo&
usage.texture = true;
}
// These changes are temporary and should be removed once texture cache will handle subresources
// merging
auto view_info_tmp = view_info;
if (view_info_tmp.range.base.level > image.info.resources.levels - 1 ||
view_info_tmp.range.base.layer > image.info.resources.layers - 1 ||
view_info_tmp.range.extent.levels > image.info.resources.levels ||
view_info_tmp.range.extent.layers > image.info.resources.layers) {
LOG_DEBUG(Render_Vulkan,
"Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})",
view_info_tmp.range.base.level, view_info_tmp.range.extent.levels,
view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers,
image.info.resources.levels, image.info.resources.layers);
view_info_tmp.range.base.level =
std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1);
view_info_tmp.range.base.layer =
std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1);
view_info_tmp.range.extent.levels =
std::min(view_info_tmp.range.extent.levels, image.info.resources.levels);
view_info_tmp.range.extent.layers =
std::min(view_info_tmp.range.extent.layers, image.info.resources.layers);
}
return RegisterImageView(image_id, view_info_tmp);
return RegisterImageView(image_id, view_info);
}
ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
@ -335,7 +417,8 @@ void TextureCache::UnregisterImage(ImageId image_id) {
});
}
void TextureCache::TrackImage(Image& image, ImageId image_id) {
void TextureCache::TrackImage(ImageId image_id) {
auto& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Tracked)) {
return;
}
@ -343,7 +426,8 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) {
tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
}
void TextureCache::UntrackImage(Image& image, ImageId image_id) {
void TextureCache::UntrackImage(ImageId image_id) {
auto& image = slot_images[image_id];
if (False(image.flags & ImageFlagBits::Tracked)) {
return;
}
@ -356,6 +440,8 @@ void TextureCache::DeleteImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
image.flags |= ImageFlagBits::Deleted;
// Remove any registered meta areas.
const auto& meta_info = image.info.meta_info;
if (meta_info.cmask_addr) {

View File

@ -65,8 +65,13 @@ public:
return;
}
RefreshImage(image, custom_scheduler);
TrackImage(image, image_id);
TrackImage(image_id);
}
[[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id,
ImageId merged_image_id);
[[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id);
/// Reuploads image contents.
void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
@ -167,14 +172,20 @@ private:
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(Image& image, ImageId image_id);
void TrackImage(ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(Image& image, ImageId image_id);
void UntrackImage(ImageId image_id);
/// Removes the image and any views/surface metas that reference it.
void DeleteImage(ImageId image_id);
void FreeImage(ImageId image_id) {
UntrackImage(image_id);
UnregisterImage(image_id);
DeleteImage(image_id);
}
private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;

View File

@ -36,6 +36,8 @@ struct Extent3D {
u32 width;
u32 height;
u32 depth;
auto operator<=>(const Extent3D&) const = default;
};
struct SubresourceLayers {