diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 1e9925fc..6136b46a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -321,9 +321,13 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MAX_F32: translator.V_MAX_F32(inst); break; + case Opcode::V_RSQ_F32: + translator.V_RSQ_F32(inst); + break; case Opcode::S_ANDN2_B64: translator.S_ANDN2_B64(inst); break; + case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: case Opcode::S_MOV_B64: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8a027e9f..6e50e8fb 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -67,6 +67,7 @@ public: void V_FMA_F32(const GcnInst& inst); void V_CMP_F32(ConditionOp op, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); + void V_RSQ_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7bb97f01..44394013 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -193,4 +193,9 @@ void Translator::V_MAX_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, src1)); } +void Translator::V_RSQ_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); +} + } // namespace Shader::Gcn diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ac507667..9c2b4bcd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -416,6 +416,10 @@ struct Liverpool { BitField<20, 4, u32> output5_mask; BitField<24, 4, u32> output6_mask; BitField<28, 4, u32> output7_mask; + + [[nodiscard]] u8 GetMask(int buf_id) const { + return (raw >> (buf_id * 4)) & 0xffu; + } }; struct IndexBufferBase { @@ -631,6 +635,10 @@ struct Liverpool { u32 clear_word1; INSERT_PADDING_WORDS(2); + operator bool() const { + return info.format != DataFormat::FormatInvalid; + } + u32 Pitch() const { return (pitch.tile_max + 1) << 3; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4b65fe07..655dc692 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -113,6 +113,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul vk::DynamicState::eBlendConstants, }; + if (instance.IsColorWriteEnableSupported()) { + dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT); + dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); + } + const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), @@ -144,18 +149,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }, }; - u32 shader_count = 2; + u32 shader_count = 1; std::array shader_stages; shader_stages[0] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, .module = modules[0], .pName = "main", }; - shader_stages[1] = vk::PipelineShaderStageCreateInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = modules[4], - .pName = "main", - }; + if (modules[4]) { + shader_stages[1] = vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = modules[4], + .pName = "main", + }; + ++shader_count; + } const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); const u32 num_color_formats = std::distance(key.color_formats.begin(), it); @@ -177,8 +185,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor), .dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor), .alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func), - .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + .colorWriteMask = + instance.IsColorWriteEnableSupported() + ? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + : key.write_masks[i], }; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c7e773ad..17ed225b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -39,6 +39,7 @@ struct GraphicsPipelineKey { Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; std::array blend_controls; + std::array write_masks; bool operator==(const GraphicsPipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0; @@ -66,6 +67,10 @@ public: return key.stage_hashes[0] == EmbeddedVsHash; } + [[nodiscard]] auto GetWriteMasks() const { + return key.write_masks; + } + private: void BuildDescSetLayout(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d35d35d1..3cfe8c79 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -115,6 +115,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT, + vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); } + if (!color_write_en) { + device_chain.unlink(); + device_chain.unlink(); + } + try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 5e1be6ce..e4ee9aa4 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -111,6 +111,11 @@ public: return external_memory_host; } + /// Returns true when VK_EXT_color_write_enable is supported + bool IsColorWriteEnableSupported() const { + return color_write_en; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -218,6 +223,7 @@ private: bool fragment_shader_barycentric{}; bool shader_stencil_export{}; bool external_memory_host{}; + bool color_write_en{}; u64 min_imported_host_pointer_alignment{}; bool tooling_info{}; bool debug_utils_supported{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9b4e6856..1ddfa2fa 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -93,17 +93,26 @@ void PipelineCache::RefreshGraphicsKey() { key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); - key.blend_controls = regs.blend_control; const auto& db = regs.depth_buffer; key.depth_format = key.depth.depth_enable ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) : vk::Format::eUndefined; - for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { - const auto& cb = regs.color_buffers[i]; - key.color_formats[i] = cb.base_address - ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) - : vk::Format::eUndefined; + // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. + // We need to do some arrays compaction at this stage + int remapped_cb{}; + for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (!col_buf) { + continue; + } + key.color_formats[remapped_cb] = + LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + + ++remapped_cb; } for (u32 i = 0; i < MaxShaderStages; i++) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ded491c4..d0b873fa 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -39,14 +39,21 @@ void Rasterizer::Draw(bool is_indexed) { const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); pipeline->BindResources(memory, vertex_index_buffer, texture_cache); - const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + boost::container::static_vector + color_attachments{}; + for (const auto& col_buf : regs.color_buffers) { + if (!col_buf) { + continue; + } + const auto& image_view = texture_cache.RenderTarget(col_buf); - const vk::RenderingAttachmentInfo color_info = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, - .loadOp = vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - }; + color_attachments.push_back({ + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + }); + } // TODO: Don't restart renderpass every draw const auto& scissor = regs.screen_scissor; @@ -57,11 +64,11 @@ void Rasterizer::Draw(bool is_indexed) { .extent = {scissor.GetWidth(), scissor.GetHeight()}, }, .layerCount = 1, - .colorAttachmentCount = 1, - .pColorAttachments = &color_info, + .colorAttachmentCount = static_cast(color_attachments.size()), + .pColorAttachments = color_attachments.data(), }; - UpdateDynamicState(); + UpdateDynamicState(*pipeline); cmdbuf.beginRendering(rendering_info); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); @@ -125,12 +132,22 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { return regs.num_indices; } -void Rasterizer::UpdateDynamicState() { +void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { UpdateViewportScissorState(); auto& regs = liverpool->regs; const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.setBlendConstants(®s.blend_constants.red); + + if (instance.IsColorWriteEnableSupported()) { + const auto& write_masks = pipeline.GetWriteMasks(); + std::array write_ens{}; + std::transform(write_masks.cbegin(), write_masks.cend(), write_ens.begin(), + [](auto in) { return in ? vk::True : vk::False; }); + + cmdbuf.setColorWriteEnableEXT(write_ens); + cmdbuf.setColorWriteMaskEXT(0, write_masks); + } } void Rasterizer::UpdateViewportScissorState() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2678a480..a1b6a5a6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -37,7 +37,7 @@ private: u32 SetupIndexBuffer(bool& is_indexed); void MapMemory(VAddr addr, size_t size); - void UpdateDynamicState(); + void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); void UpdateDepthStencilState(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 658cfa41..e42a0bbd 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -15,6 +15,7 @@ #define PAGE_NOACCESS PROT_NONE #define PAGE_READWRITE (PROT_READ | PROT_WRITE) +#define PAGE_READONLY PROT_READ #else #include