diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 24940a98..267e8f4a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -248,6 +248,19 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&buffer, buffer.Offset(device_addr)}; } +std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { + const u64 page = gpu_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (buffer_id) { + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(gpu_addr, size)) { + return {&buffer, buffer.Offset(gpu_addr)}; + } + } + const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + return {&staging_buffer, offset}; +} + bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { const VAddr end_addr = addr + size; const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); @@ -272,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { return memory_tracker.IsRegionCpuModified(addr, size); } +bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionGpuModified(addr, size); +} + BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { if (device_addr == 0) { return NULL_BUFFER_ID; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0dee87cf..33ea3f86 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -69,12 +69,18 @@ public: /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written); + /// Obtains a temporary buffer for usage in texture cache. + [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 8178c88d..113b380e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,7 +47,7 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image, is_eop); } @@ -61,7 +61,7 @@ public: const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); return texture_cache.GetImage(image_id); } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f1148760..bae4b89d 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -117,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index b18f1002..5a888346 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -111,6 +111,7 @@ struct Image { vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; + boost::container::small_vector mip_hashes; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 4bf23139..40c47271 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "video_core/page_manager.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" @@ -11,13 +12,11 @@ namespace VideoCore { -static constexpr u64 StreamBufferSize = 512_MB; static constexpr u64 PageShift = 12; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, - staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize}, tile_manager{instance, scheduler} { ImageInfo info; info.pixel_format = vk::Format::eR8G8B8A8Unorm; @@ -60,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { } } -ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { +ImageId TextureCache::FindImage(const ImageInfo& info) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } @@ -90,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { - RefreshImage(image); - TrackImage(image, image_id); - } - return image_id; } @@ -122,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); + UpdateImage(image_id); Image& image = slot_images[image_id]; auto& usage = image.info.usage; @@ -168,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; - image.flags &= ~ImageFlagBits::CpuModified; + image.flags |= ImageFlagBits::GpuModified; + UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, vk::AccessFlagBits::eColorAttachmentWrite | @@ -201,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(image_info, false); + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; + image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::CpuModified; const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal @@ -231,22 +227,6 @@ void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - scheduler.EndRendering(); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - vk::Buffer buffer{staging.Handle()}; - u32 offset{0}; - - auto upload_buffer = tile_manager.TryDetile(image); - if (upload_buffer) { - buffer = *upload_buffer; - } else { - // Upload data to the staging buffer. - offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16); - } - const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); @@ -257,23 +237,58 @@ void TextureCache::RefreshImage(Image& image) { const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + + // Protect GPU modified resources from accidental reuploads. + if (True(image.flags & ImageFlagBits::GpuModified) && + !buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { + const u8* addr = std::bit_cast(image.info.guest_address); + const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); + if (image.mip_hashes[m] == hash) { + continue; + } + image.mip_hashes[m] = hash; + } image_copy.push_back({ - .bufferOffset = offset + mip_ofs * num_layers, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), + .bufferOffset = mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, }, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, }); } + if (image_copy.empty()) { + return; + } + + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + + const VAddr image_addr = image.info.guest_address; + const size_t image_size = image.info.guest_size_bytes; + vk::Buffer buffer{}; + u32 offset{}; + if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) { + buffer = *upload_buffer; + } else { + const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); + buffer = vk_buffer->Handle(); + offset = buf_offset; + } + + for (auto& copy : image_copy) { + copy.bufferOffset += offset; + } + cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b2a5f1d4..b3af0ff1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -44,7 +44,7 @@ public: void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); + [[nodiscard]] ImageId FindImage(const ImageInfo& info); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, @@ -58,6 +58,16 @@ public: [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info); + /// Updates image contents if it was modified by CPU. + void UpdateImage(ImageId image_id) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::CpuModified)) { + return; + } + RefreshImage(image); + TrackImage(image, image_id); + } + /// Reuploads image contents. void RefreshImage(Image& image); @@ -170,7 +180,6 @@ private: Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; PageManager& tracker; - StreamBuffer staging; TileManager tile_manager; Common::SlotVector slot_images; Common::SlotVector slot_image_views; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 75fa378c..6447fde1 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -5,7 +5,6 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image_view.h" -#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" #include "video_core/host_shaders/detile_m32x1_comp.h"