diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 59387faf..04281007 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -185,13 +185,16 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddCapability(spv::Capability::Int16); } ctx.AddCapability(spv::Capability::Int64); - if (info.has_storage_images) { + if (info.has_storage_images || info.has_image_buffers) { ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); } + if (info.has_image_buffers) { + ctx.AddCapability(spv::Capability::ImageBuffer); + } switch (program.info.stage) { case Stage::Compute: { const std::array workgroup_size{program.info.workgroup_size}; diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 82d99f99..c61494a6 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -30,9 +30,11 @@ void Visit(Info& info, IR::Inst& inst) { info.has_storage_images = true; break; case IR::Opcode::LoadBufferFormatF32: - case IR::Opcode::StoreBufferFormatF32: info.has_texel_buffers = true; break; + case IR::Opcode::StoreBufferFormatF32: + info.has_image_buffers = true; + break; case IR::Opcode::QuadShuffle: info.uses_group_quad = true; break; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 62318a20..b8f888d2 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -238,6 +238,7 @@ struct Info { u64 pgm_hash{}; u32 shared_memory_size{}; bool has_storage_images{}; + bool has_image_buffers{}; bool has_texel_buffers{}; bool has_discard{}; bool has_image_gather{}; diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e9dceb62..68a4aa52 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -13,13 +13,6 @@ namespace VideoCore { -constexpr vk::BufferUsageFlags AllFlags = - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eIndirectBuffer; - std::string_view BufferTypeName(MemoryUsage type) { switch (type) { case MemoryUsage::Upload: @@ -96,13 +89,13 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa } Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_, - u64 size_bytes_) + vk::BufferUsageFlags flags, u64 size_bytes_) : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} { // Create buffer object. const vk::BufferCreateInfo buffer_ci = { .size = size_bytes, - .usage = AllFlags, + .usage = flags, }; VmaAllocationInfo alloc_info{}; buffer.Create(buffer_ci, usage, &alloc_info); @@ -119,25 +112,33 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_ is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } -vk::BufferView Buffer::View(u32 offset, u32 size, AmdGpu::DataFormat dfmt, +vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, AmdGpu::NumberFormat nfmt) { - const auto it{std::ranges::find_if(views, [offset, size, dfmt, nfmt](const BufferView& view) { - return offset == view.offset && size == view.size && dfmt == view.dfmt && nfmt == view.nfmt; + const auto it{std::ranges::find_if(views, [=](const BufferView& view) { + return offset == view.offset && size == view.size && is_written == view.is_written && + dfmt == view.dfmt && nfmt == view.nfmt; })}; if (it != views.end()) { return it->handle; } + const vk::BufferUsageFlags2CreateInfoKHR usage_flags = { + .usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer + : vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer, + }; + const vk::BufferViewCreateInfo view_ci = { + .pNext = &usage_flags, + .buffer = buffer.buffer, + .format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt), + .offset = offset, + .range = size, + }; views.push_back({ .offset = offset, .size = size, + .is_written = is_written, .dfmt = dfmt, .nfmt = nfmt, - .handle = instance->GetDevice().createBufferView({ - .buffer = buffer.buffer, - .format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt), - .offset = offset, - .range = size, - }), + .handle = instance->GetDevice().createBufferView(view_ci), }); return views.back().handle; } @@ -147,7 +148,7 @@ constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_, MemoryUsage usage, u64 size_bytes) - : Buffer{instance, usage, 0, size_bytes}, scheduler{scheduler_} { + : Buffer{instance, usage, 0, AllFlags, size_bytes}, scheduler{scheduler_} { ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); const auto device = instance.GetDevice(); diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 87293c76..7bcd0609 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -31,6 +31,15 @@ enum class MemoryUsage { Stream, ///< Requests device local host visible buffer, falling back host memory. }; +constexpr vk::BufferUsageFlags ReadFlags = + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer; + +constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageTexelBuffer | + vk::BufferUsageFlagBits::eStorageBuffer; + struct UniqueBuffer { explicit UniqueBuffer(vk::Device device, VmaAllocator allocator); ~UniqueBuffer(); @@ -65,7 +74,7 @@ struct UniqueBuffer { class Buffer { public: explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_, - u64 size_bytes_); + vk::BufferUsageFlags flags, u64 size_bytes_); Buffer& operator=(const Buffer&) = delete; Buffer(const Buffer&) = delete; @@ -73,7 +82,8 @@ public: Buffer& operator=(Buffer&&) = default; Buffer(Buffer&&) = default; - vk::BufferView View(u32 offset, u32 size, AmdGpu::DataFormat dfmt, AmdGpu::NumberFormat nfmt); + vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, + AmdGpu::NumberFormat nfmt); /// Increases the likeliness of this being a stream buffer void IncreaseStreamScore(int score) noexcept { @@ -121,6 +131,7 @@ public: struct BufferView { u32 offset; u32 size; + bool is_written; AmdGpu::DataFormat dfmt; AmdGpu::NumberFormat nfmt; vk::BufferView handle; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 02d6b2ce..b838989f 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -23,7 +23,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, memory_tracker{&tracker} { // Ensure the first slot is used for the null buffer - void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, 1)); + void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); } BufferCache::~BufferCache() = default; @@ -421,7 +421,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = - slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, size); + slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); @@ -495,7 +495,8 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size) } else { // For large one time transfers use a temporary host buffer. // RenderDoc can lag quite a bit if the stream buffer is too large. - Buffer temp_buffer{instance, MemoryUsage::Upload, 0, total_size_bytes}; + Buffer temp_buffer{instance, MemoryUsage::Upload, 0, vk::BufferUsageFlagBits::eTransferSrc, + total_size_bytes}; src_buffer = temp_buffer.Handle(); u8* const staging = temp_buffer.mapped_data.data(); for (auto& copy : copies) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a2590670..71cc9815 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -172,8 +172,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding, adjust / fmt_stride); } - buffer_view = vk_buffer->View(offset_aligned, size + adjust, vsharp.GetDataFmt(), - vsharp.GetNumberFmt()); + buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written, + vsharp.GetDataFmt(), vsharp.GetNumberFmt()); } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6e7c7782..f56de831 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -422,8 +422,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding, adjust / fmt_stride); } - buffer_view = vk_buffer->View(offset, size + adjust, vsharp.GetDataFmt(), - vsharp.GetNumberFmt()); + buffer_view = vk_buffer->View(offset, size + adjust, tex_buffer.is_written, + vsharp.GetDataFmt(), vsharp.GetNumberFmt()); } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 19939a32..1be331a7 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -178,7 +178,7 @@ bool Instance::CreateDevice() { return false; } - boost::container::static_vector enabled_extensions; + boost::container::static_vector enabled_extensions; const auto add_extension = [&](std::string_view extension) -> bool { const auto result = std::find_if(available_extensions.begin(), available_extensions.end(), @@ -217,6 +217,7 @@ bool Instance::CreateDevice() { // with extensions. tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + const bool maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); @@ -299,6 +300,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceMaintenance4FeaturesKHR{ .maintenance4 = true, }, + vk::PhysicalDeviceMaintenance5FeaturesKHR{ + .maintenance5 = true, + }, vk::PhysicalDeviceDynamicRenderingFeaturesKHR{ .dynamicRendering = true, }, @@ -344,6 +348,9 @@ bool Instance::CreateDevice() { if (!maintenance4) { device_chain.unlink(); } + if (!maintenance5) { + device_chain.unlink(); + } if (!custom_border_color) { device_chain.unlink(); }