From d05cbd88bb2252ba7051d7c115575eb744da0ef8 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 09:42:28 +0200 Subject: [PATCH 1/7] video_core: renderer_vulkan: support for passes w\o a fs --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4b65fe07..44bb61bf 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -144,18 +144,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }, }; - u32 shader_count = 2; + u32 shader_count = 1; std::array shader_stages; shader_stages[0] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, .module = modules[0], .pName = "main", }; - shader_stages[1] = vk::PipelineShaderStageCreateInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = modules[4], - .pName = "main", - }; + if (modules[4]) { + shader_stages[1] = vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = modules[4], + .pName = "main", + }; + ++shader_count; + } const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); const u32 num_color_formats = std::distance(key.color_formats.begin(), it); From 3741f013a34f7e7df9e10f00989c2d1a26e7f983 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 09:43:49 +0200 Subject: [PATCH 2/7] shader_recompiler: added `NOP` and `RSQ` instructions --- .../backend/spirv/emit_spirv_floating_point.cpp | 4 ++++ .../backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/frontend/translate/translate.cpp | 4 ++++ src/shader_recompiler/frontend/translate/translate.h | 1 + src/shader_recompiler/frontend/translate/vector_alu.cpp | 5 +++++ src/shader_recompiler/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + 8 files changed, 21 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e56eb916..f526aaba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -122,6 +122,10 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) { return ctx.OpSqrt(ctx.F32[1], value); } +Id EmitFPInvSqrt(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); +} + Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2192b054..8972ff4a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -174,6 +174,7 @@ Id EmitFPRecip64(EmitContext& ctx, Id value); Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPInvSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 1e9925fc..6136b46a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -321,9 +321,13 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MAX_F32: translator.V_MAX_F32(inst); break; + case Opcode::V_RSQ_F32: + translator.V_RSQ_F32(inst); + break; case Opcode::S_ANDN2_B64: translator.S_ANDN2_B64(inst); break; + case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: case Opcode::S_MOV_B64: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8a027e9f..6e50e8fb 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -67,6 +67,7 @@ public: void V_FMA_F32(const GcnInst& inst); void V_CMP_F32(ConditionOp op, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); + void V_RSQ_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7bb97f01..bdb69672 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -193,4 +193,9 @@ void Translator::V_MAX_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, src1)); } +void Translator::V_RSQ_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPInvSqrt(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 43e8e439..beacc2fe 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -609,6 +609,10 @@ F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } +F32 IREmitter::FPInvSqrt(const F32& value) { + return Inst(Opcode::FPInvSqrt, value); +} + F32F64 IREmitter::FPSaturate(const F32F64& value) { switch (value.Type()) { case Type::F32: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index a52437a9..771e9b13 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -123,6 +123,7 @@ public: [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F32 FPInvSqrt(const F32& value); [[nodiscard]] F32F64 FPSaturate(const F32F64& value); [[nodiscard]] F32F64 FPClamp(const F32F64& value, const F32F64& min_value, const F32F64& max_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 5fb4dd0f..5a30142f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -142,6 +142,7 @@ OPCODE(FPRecip64, F64, F64, OPCODE(FPRecipSqrt32, F32, F32, ) OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPInvSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) OPCODE(FPExp2, F32, F32, ) OPCODE(FPCos, F32, F32, ) From 9227a2b86844b98b810ab02dd483c7fba5b7a688 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 11:21:26 +0200 Subject: [PATCH 3/7] video_core: multiple color attachments support --- src/video_core/amdgpu/liverpool.h | 4 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 5 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 25 ++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ac507667..442a66f2 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -631,6 +631,10 @@ struct Liverpool { u32 clear_word1; INSERT_PADDING_WORDS(2); + operator bool() const { + return info.format != DataFormat::FormatInvalid; + } + u32 Pitch() const { return (pitch.tile_max + 1) << 3; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9b4e6856..d4a38308 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -101,9 +101,8 @@ void PipelineCache::RefreshGraphicsKey() { : vk::Format::eUndefined; for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { const auto& cb = regs.color_buffers[i]; - key.color_formats[i] = cb.base_address - ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) - : vk::Format::eUndefined; + key.color_formats[i] = cb ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) + : vk::Format::eUndefined; } for (u32 i = 0; i < MaxShaderStages; i++) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ded491c4..5bdb443d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -39,14 +39,21 @@ void Rasterizer::Draw(bool is_indexed) { const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); pipeline->BindResources(memory, vertex_index_buffer, texture_cache); - const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + boost::container::static_vector + color_attachments{}; + for (const auto& col_buf : regs.color_buffers) { + if (!col_buf) { + continue; + } + const auto& image_view = texture_cache.RenderTarget(col_buf); - const vk::RenderingAttachmentInfo color_info = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, - .loadOp = vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - }; + color_attachments.push_back({ + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + }); + } // TODO: Don't restart renderpass every draw const auto& scissor = regs.screen_scissor; @@ -57,8 +64,8 @@ void Rasterizer::Draw(bool is_indexed) { .extent = {scissor.GetWidth(), scissor.GetHeight()}, }, .layerCount = 1, - .colorAttachmentCount = 1, - .pColorAttachments = &color_info, + .colorAttachmentCount = static_cast(color_attachments.size()), + .pColorAttachments = color_attachments.data(), }; UpdateDynamicState(); From 0212e6875280f62e2cfc5da78fed3d65d8da313a Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 11:50:42 +0200 Subject: [PATCH 4/7] shader_recompiler: redundant IR opcode removed --- .../backend/spirv/emit_spirv_floating_point.cpp | 4 ---- src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 1 - src/shader_recompiler/frontend/translate/vector_alu.cpp | 2 +- src/shader_recompiler/ir/ir_emitter.cpp | 4 ---- src/shader_recompiler/ir/ir_emitter.h | 1 - src/shader_recompiler/ir/opcodes.inc | 1 - 6 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index f526aaba..e56eb916 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -122,10 +122,6 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) { return ctx.OpSqrt(ctx.F32[1], value); } -Id EmitFPInvSqrt(EmitContext& ctx, Id value) { - return ctx.OpInverseSqrt(ctx.F32[1], value); -} - Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 8972ff4a..2192b054 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -174,7 +174,6 @@ Id EmitFPRecip64(EmitContext& ctx, Id value); Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPInvSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index bdb69672..44394013 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -195,7 +195,7 @@ void Translator::V_MAX_F32(const GcnInst& inst) { void Translator::V_RSQ_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; - SetDst(inst.dst[0], ir.FPInvSqrt(src0)); + SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index beacc2fe..43e8e439 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -609,10 +609,6 @@ F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } -F32 IREmitter::FPInvSqrt(const F32& value) { - return Inst(Opcode::FPInvSqrt, value); -} - F32F64 IREmitter::FPSaturate(const F32F64& value) { switch (value.Type()) { case Type::F32: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 771e9b13..a52437a9 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -123,7 +123,6 @@ public: [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); - [[nodiscard]] F32 FPInvSqrt(const F32& value); [[nodiscard]] F32F64 FPSaturate(const F32F64& value); [[nodiscard]] F32F64 FPClamp(const F32F64& value, const F32F64& min_value, const F32F64& max_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 5a30142f..5fb4dd0f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -142,7 +142,6 @@ OPCODE(FPRecip64, F64, F64, OPCODE(FPRecipSqrt32, F32, F32, ) OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPInvSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) OPCODE(FPExp2, F32, F32, ) OPCODE(FPCos, F32, F32, ) From 767e13cccb65bd9180742b10c5c2fda63c71f1b9 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 12:09:33 +0200 Subject: [PATCH 5/7] Linux build fix --- src/video_core/texture_cache/texture_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 658cfa41..e42a0bbd 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -15,6 +15,7 @@ #define PAGE_NOACCESS PROT_NONE #define PAGE_READWRITE (PROT_READ | PROT_WRITE) +#define PAGE_READONLY PROT_READ #else #include From 8e6d49252404d5ad85f327fb6938ae8cc8fe8a7d Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 12:55:37 +0200 Subject: [PATCH 6/7] video_core: renderer_vulkan: added color mask support --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_instance.cpp | 13 +++++++++++++ src/video_core/renderer_vulkan/vk_instance.h | 6 ++++++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 13 +++++++++++++ 4 files changed, 37 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 44bb61bf..31255a63 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -113,6 +113,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul vk::DynamicState::eBlendConstants, }; + if (instance.IsColorWriteEnableSupported()) { + dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT); + dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); + } + const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d35d35d1..578a8654 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -115,6 +115,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT, + vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); } + if (!color_write_en) { + device_chain.unlink(); + } + try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 5e1be6ce..e4ee9aa4 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -111,6 +111,11 @@ public: return external_memory_host; } + /// Returns true when VK_EXT_color_write_enable is supported + bool IsColorWriteEnableSupported() const { + return color_write_en; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -218,6 +223,7 @@ private: bool fragment_shader_barycentric{}; bool shader_stencil_export{}; bool external_memory_host{}; + bool color_write_en{}; u64 min_imported_host_pointer_alignment{}; bool tooling_info{}; bool debug_utils_supported{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5bdb443d..fa973ed8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -138,6 +138,19 @@ void Rasterizer::UpdateDynamicState() { auto& regs = liverpool->regs; const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.setBlendConstants(®s.blend_constants.red); + + if (instance.IsColorWriteEnableSupported()) { + std::array write_en{}; + std::array write_mask{}; + for (int col_buf_idx = 0; col_buf_idx < Liverpool::NumColorBuffers; ++col_buf_idx) { + const auto mask = regs.color_target_mask.raw >> (col_buf_idx * 4); + write_en[col_buf_idx] = mask ? vk::True : vk::False; + write_mask[col_buf_idx] = vk::ColorComponentFlags{mask}; + } + + cmdbuf.setColorWriteEnableEXT(write_en); + cmdbuf.setColorWriteMaskEXT(0, write_mask); + } } void Rasterizer::UpdateViewportScissorState() { From 365df2a3ac67617e4a0771f3dadc61c1f0109606 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 30 May 2024 16:27:39 +0200 Subject: [PATCH 7/7] video_core: renderer_vulkan: write mask fallback --- src/video_core/amdgpu/liverpool.h | 4 ++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 5 +++++ .../renderer_vulkan/vk_instance.cpp | 7 +++++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 20 ++++++++++++++----- .../renderer_vulkan/vk_rasterizer.cpp | 19 ++++++++---------- .../renderer_vulkan/vk_rasterizer.h | 2 +- 7 files changed, 43 insertions(+), 21 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 442a66f2..9c2b4bcd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -416,6 +416,10 @@ struct Liverpool { BitField<20, 4, u32> output5_mask; BitField<24, 4, u32> output6_mask; BitField<28, 4, u32> output7_mask; + + [[nodiscard]] u8 GetMask(int buf_id) const { + return (raw >> (buf_id * 4)) & 0xffu; + } }; struct IndexBufferBase { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 31255a63..655dc692 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -185,8 +185,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor), .dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor), .alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func), - .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + .colorWriteMask = + instance.IsColorWriteEnableSupported() + ? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + : key.write_masks[i], }; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c7e773ad..17ed225b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -39,6 +39,7 @@ struct GraphicsPipelineKey { Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; std::array blend_controls; + std::array write_masks; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0; @@ -66,6 +67,10 @@ public: return key.stage_hashes[0] == EmbeddedVsHash; } + [[nodiscard]] auto GetWriteMasks() const { + return key.write_masks; + } + private: void BuildDescSetLayout(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 578a8654..3cfe8c79 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -151,10 +151,11 @@ bool Instance::CreateDevice() { tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME); - color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); - add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + // The next two extensions are required to be available together in order to support write masks + color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); + color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -194,6 +195,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFeatures2{ .features{ .robustBufferAccess = features.robustBufferAccess, + .independentBlend = true, .geometryShader = features.geometryShader, .logicOp = features.logicOp, .samplerAnisotropy = features.samplerAnisotropy, @@ -233,6 +235,7 @@ bool Instance::CreateDevice() { if (!color_write_en) { device_chain.unlink(); + device_chain.unlink(); } try { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d4a38308..1ddfa2fa 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -93,16 +93,26 @@ void PipelineCache::RefreshGraphicsKey() { key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); - key.blend_controls = regs.blend_control; const auto& db = regs.depth_buffer; key.depth_format = key.depth.depth_enable ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) : vk::Format::eUndefined; - for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { - const auto& cb = regs.color_buffers[i]; - key.color_formats[i] = cb ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) - : vk::Format::eUndefined; + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. + // We need to do some arrays compaction at this stage + int remapped_cb{}; + for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (!col_buf) { + continue; + } + key.color_formats[remapped_cb] = + LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + + ++remapped_cb; } for (u32 i = 0; i < MaxShaderStages; i++) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa973ed8..d0b873fa 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -68,7 +68,7 @@ void Rasterizer::Draw(bool is_indexed) { .pColorAttachments = color_attachments.data(), }; - UpdateDynamicState(); + UpdateDynamicState(*pipeline); cmdbuf.beginRendering(rendering_info); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); @@ -132,7 +132,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { return regs.num_indices; } -void Rasterizer::UpdateDynamicState() { +void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { UpdateViewportScissorState(); auto& regs = liverpool->regs; @@ -140,16 +140,13 @@ void Rasterizer::UpdateDynamicState() { cmdbuf.setBlendConstants(®s.blend_constants.red); if (instance.IsColorWriteEnableSupported()) { - std::array write_en{}; - std::array write_mask{}; - for (int col_buf_idx = 0; col_buf_idx < Liverpool::NumColorBuffers; ++col_buf_idx) { - const auto mask = regs.color_target_mask.raw >> (col_buf_idx * 4); - write_en[col_buf_idx] = mask ? vk::True : vk::False; - write_mask[col_buf_idx] = vk::ColorComponentFlags{mask}; - } + const auto& write_masks = pipeline.GetWriteMasks(); + std::array write_ens{}; + std::transform(write_masks.cbegin(), write_masks.cend(), write_ens.begin(), + [](auto in) { return in ? vk::True : vk::False; }); - cmdbuf.setColorWriteEnableEXT(write_en); - cmdbuf.setColorWriteMaskEXT(0, write_mask); + cmdbuf.setColorWriteEnableEXT(write_ens); + cmdbuf.setColorWriteMaskEXT(0, write_masks); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2678a480..a1b6a5a6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -37,7 +37,7 @@ private: u32 SetupIndexBuffer(bool& is_indexed); void MapMemory(VAddr addr, size_t size); - void UpdateDynamicState(); + void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); void UpdateDepthStencilState();