From b4d24d873769c6dd39b6b69f85e455517d1c8d46 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 02:11:53 +0300 Subject: [PATCH] renderer_vulkan: Prefer depth stencil read-only layout when possible * Persona reads a depth attachment while it is being attached with writes disabled. Now this works without spamming vk validation errors --- .../backend/spirv/emit_spirv_image.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 8 +++ .../renderer_vulkan/renderer_vulkan.h | 6 ++- .../renderer_vulkan/vk_compute_pipeline.cpp | 13 ++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 +++--- .../renderer_vulkan/vk_instance.cpp | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 48 +++++++++++------- .../renderer_vulkan/vk_scheduler.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 7 ++- .../renderer_vulkan/vk_stream_buffer.cpp | 2 +- src/video_core/texture_cache/image.cpp | 2 +- src/video_core/texture_cache/image_view.cpp | 3 +- src/video_core/texture_cache/image_view.h | 1 + .../texture_cache/texture_cache.cpp | 49 +++++++++++-------- src/video_core/texture_cache/texture_cache.h | 12 ++--- 15 files changed, 106 insertions(+), 65 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 13db664c..2058e7b7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -26,7 +26,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.Add(spv::ImageOperandsMask::ConstOffset, offset); } return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 4767e59f..5261857a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -528,6 +528,14 @@ struct Liverpool { BitField<0, 15, s32> bottom_right_x; BitField<15, 15, s32> bottom_right_y; }; + + u32 GetWidth() const { + return bottom_right_x - top_left_x; + } + + u32 GetHeight() const { + return bottom_right_y - top_left_y; + } }; struct ViewportDepth { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 523ff05b..bf5d220b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -41,7 +41,8 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { const auto info = VideoCore::ImageInfo{attribute}; - auto& image = texture_cache.FindImage(info, cpu_address); + const auto image_id = texture_cache.FindImage(info, cpu_address); + auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image); } @@ -54,7 +55,8 @@ public: const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); const auto info = VideoCore::ImageInfo{attribute}; - return texture_cache.FindImage(info, cpu_address); + const auto image_id = texture_cache.FindImage(info, cpu_address); + return texture_cache.GetImage(image_id); } bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index d7e68712..2a2e4472 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -125,17 +125,18 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s } } - for (const auto& image : info.images) { - const auto tsharp = info.ReadUd(image.sgpr_base, image.dword_offset); - const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral); + for (const auto& image_desc : info.images) { + const auto tsharp = info.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); + const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage); + const auto& image = texture_cache.GetImage(image_view.image_id); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, + .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .pImageInfo = &image_infos.back(), }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d3a7df05..880a0aad 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -348,19 +348,18 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } } - for (const auto& image : stage.images) { - const auto tsharp = stage.ReadUd(image.sgpr_base, image.dword_offset); - const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - (image.is_storage || image.is_depth) ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eShaderReadOnlyOptimal); + for (const auto& image_desc : stage.images) { + const auto tsharp = stage.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); + const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage); + const auto& image = texture_cache.GetImage(image_view.image_id); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, + .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .pImageInfo = &image_infos.back(), }); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index b6cffc1a..99f289e4 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -203,6 +203,7 @@ bool Instance::CreateDevice() { .independentBlend = true, .geometryShader = features.geometryShader, .logicOp = features.logicOp, + .multiViewport = true, .samplerAnisotropy = features.samplerAnisotropy, .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, .shaderImageGatherExtended = true, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 82a938ae..0f024b7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -44,10 +44,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } + UpdateDynamicState(*pipeline); + pipeline->BindResources(memory, vertex_index_buffer, texture_cache); BeginRendering(); - UpdateDynamicState(*pipeline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { @@ -113,12 +114,14 @@ void Rasterizer::BeginRendering() { const bool is_clear = regs.depth_render_control.depth_clear_enable || texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; - const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint); + const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint, + regs.depth_control.depth_write_enable); + const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, hint.width); state.height = std::min(state.height, hint.height); state.depth_attachment = { .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, + .imageLayout = image.layout, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, @@ -192,23 +195,34 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { void Rasterizer::UpdateViewportScissorState() { auto& regs = liverpool->regs; + boost::container::static_vector viewports; + boost::container::static_vector scissors; + const float reduce_z = regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW ? 1.0f : 0.0f; + for (u32 i = 0; i < Liverpool::NumViewports; i++) { + const auto& vp = regs.viewports[i]; + const auto& vp_d = regs.viewport_depths[i]; + if (vp.xscale == 0) { + continue; + } + viewports.push_back({ + .x = vp.xoffset - vp.xscale, + .y = vp.yoffset - vp.yscale, + .width = vp.xscale * 2.0f, + .height = vp.yscale * 2.0f, + .minDepth = vp.zoffset - vp.zscale * reduce_z, + .maxDepth = vp.zscale + vp.zoffset, + }); + } + const auto& sc = regs.screen_scissor; + scissors.push_back({ + .offset = {sc.top_left_x, sc.top_left_y}, + .extent = {sc.GetWidth(), sc.GetHeight()}, + }); const auto cmdbuf = scheduler.CommandBuffer(); - const vk::Viewport viewport{ - .x = regs.viewports[0].xoffset - regs.viewports[0].xscale, - .y = regs.viewports[0].yoffset - regs.viewports[0].yscale, - .width = regs.viewports[0].xscale * 2.0f, - .height = regs.viewports[0].yscale * 2.0f, - .minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale * reduce_z, - .maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset, - }; - const vk::Rect2D scissor{ - .offset = {regs.screen_scissor.top_left_x, regs.screen_scissor.top_left_y}, - .extent = {regs.screen_scissor.GetWidth(), regs.screen_scissor.GetHeight()}, - }; - cmdbuf.setViewport(0, viewport); - cmdbuf.setScissor(0, scissor); + cmdbuf.setViewport(0, viewports); + cmdbuf.setScissor(0, scissors); } void Rasterizer::UpdateDepthStencilState() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 51cb4690..dea5d72d 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -32,7 +32,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .extent = {render_state.width, render_state.height}, }, .layerCount = 1, - .colorAttachmentCount = static_cast(render_state.color_attachments.size()), + .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.color_attachments.data(), .pDepthAttachment = render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 7bf50622..21a8ceaf 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -23,7 +23,7 @@ struct RenderState { bool operator==(const RenderState& other) const noexcept { return std::memcmp(this, &other, sizeof(RenderState)) == 0; - } + } }; class Scheduler { @@ -46,6 +46,11 @@ public: /// Ends current rendering scope. void EndRendering(); + /// Returns the current render state. + const RenderState& GetRenderState() const { + return render_state; + } + /// Returns the current command buffer. vk::CommandBuffer CommandBuffer() const { return current_cmdbuf; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 116f7896..44447241 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; wait_bound = watch.upper_bound; - scheduler.Wait(watch.tick); + //scheduler.Wait(watch.tick); ++wait_cursor; } } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 0345ea0f..427f7e4b 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -267,7 +267,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, if (info.is_tiled) { ImageViewInfo view_info; view_info.format = DemoteImageFormatForDetiling(info.pixel_format); - view_for_detiler.emplace(*instance, view_info, *this); + view_for_detiler.emplace(*instance, view_info, *this, ImageId{}); } Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index ba45c493..bef4aa19 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -71,8 +71,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, + ImageId image_id_, std::optional usage_override /*= {}*/) - : info{info_} { + : info{info_}, image_id{image_id_} { vk::ImageViewUsageCreateInfo usage_ci{}; if (usage_override) { usage_ci.usage = usage_override.value(); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 6b567ed1..52e6c83c 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -36,6 +36,7 @@ struct Image; struct ImageView { explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, + ImageId image_id, std::optional usage_override = {}); ~ImageView(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index fdfab4ea..f17854a7 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -93,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ASSERT(null_id.index == 0); ImageViewInfo view_info; - void(slot_image_views.insert(instance, view_info, slot_images[null_id])); + void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id)); } TextureCache::~TextureCache() { @@ -112,7 +112,7 @@ void TextureCache::OnCpuWrite(VAddr address) { }); } -Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) { +ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) { std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { @@ -140,16 +140,16 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re RegisterMeta(info, image_id); Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && - (!image_ids.empty() || refresh_on_create)) { + if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { RefreshImage(image); TrackImage(image, image_id); } - return image; + return image_id; } -ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& view_info) { +ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo& view_info) { + Image& image = slot_images[image_id]; if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } @@ -162,32 +162,37 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi usage_override = image.usage & ~vk::ImageUsageFlagBits::eStorage; } - const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override); + const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, image_id, usage_override); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } -ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage, bool is_depth) { +ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) { const ImageInfo info{desc}; - Image& image = FindImage(info, desc.Address()); + const ImageId image_id = FindImage(info, desc.Address()); + Image& image = slot_images[image_id]; + auto& usage = image.info.usage; - if (is_storage || is_depth) { + if (is_storage) { image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); - image.info.usage.storage = true; + usage.storage = true; } else { - image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead); - image.info.usage.texture = true; + const auto new_layout = image.info.IsDepthStencil() ? vk::ImageLayout::eDepthStencilReadOnlyOptimal + : vk::ImageLayout::eShaderReadOnlyOptimal; + image.Transit(new_layout, vk::AccessFlagBits::eShaderRead); + usage.texture = true; } const ImageViewInfo view_info{desc, is_storage}; - return RegisterImageView(image, view_info); + return RegisterImageView(image_id, view_info); } ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - auto& image = FindImage(info, buffer.Address(), false); + const ImageId image_id = FindImage(info, buffer.Address(), false); + Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; image.Transit(vk::ImageLayout::eColorAttachmentOptimal, @@ -197,17 +202,21 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff image.info.usage.render_target = true; ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer}; - return RegisterImageView(image, view_info); + return RegisterImageView(image_id, view_info); } ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint) { + const AmdGpu::Liverpool::CbDbExtent& hint, + bool write_enabled) { const ImageInfo info{buffer, htile_address, hint}; - auto& image = FindImage(info, buffer.Address(), false); + const ImageId image_id = FindImage(info, buffer.Address(), false); + Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; - image.Transit(vk::ImageLayout::eGeneral, + const auto new_layout = write_enabled ? vk::ImageLayout::eDepthStencilAttachmentOptimal + : vk::ImageLayout::eDepthStencilReadOnlyOptimal; + image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); @@ -215,7 +224,7 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe ImageViewInfo view_info; view_info.format = info.pixel_format; - return RegisterImageView(image, view_info); + return RegisterImageView(image_id, view_info); } void TextureCache::RefreshImage(Image& image) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 00374697..421651ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -48,19 +48,19 @@ public: void OnCpuWrite(VAddr address); /// Retrieves the image handle of the image with the provided attributes and address. - [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, - bool refresh_on_create = true); + [[nodiscard]] ImageId FindImage(const ImageInfo& info, VAddr cpu_address, + bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage, - bool is_depth); + [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint); [[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint); + const AmdGpu::Liverpool::CbDbExtent& hint, + bool write_enabled); /// Reuploads image contents. void RefreshImage(Image& image); @@ -95,7 +95,7 @@ public: } private: - ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info); + ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); /// Iterate over all page indices in a range template