From 38080b60aff362f1bf980d6d0df2fc587da0cd1c Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 6 Jul 2024 02:42:16 +0300 Subject: [PATCH] shader_recompiler: Check usage before enabling capabilities (#245) * vk_instance: Better feature check * shader_recompiler: Make most features optional * vk_instance: Bump extension vector size * resource_tracking_pass: Perform BFS for sharp tracking * The Witness triggered this --- src/core/libraries/gnmdriver/gnmdriver.cpp | 2 +- .../backend/spirv/emit_spirv.cpp | 26 +++++++--- .../backend/spirv/spirv_emit_context.cpp | 16 +++--- .../frontend/translate/translate.cpp | 1 + .../ir/breadth_first_search.h | 52 +++++++++++++++++++ .../ir/passes/resource_tracking_pass.cpp | 25 ++++----- .../ir/passes/shader_info_collection_pass.cpp | 18 +++++++ src/shader_recompiler/runtime_info.h | 5 ++ src/video_core/amdgpu/liverpool.h | 11 ++-- .../renderer_vulkan/liverpool_to_vk.cpp | 7 +++ .../renderer_vulkan/vk_instance.cpp | 48 +++++++++-------- src/video_core/renderer_vulkan/vk_instance.h | 1 + 12 files changed, 151 insertions(+), 61 deletions(-) create mode 100644 src/shader_recompiler/ir/breadth_first_search.h diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 3fc79abe..b2c5b752 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1029,7 +1029,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke if (cmdbuf && marker) { const auto len = std::strlen(marker); - const u32 packet_size = ((len + 8) >> 2) + ((len + 0xc) >> 3); + const u32 packet_size = ((len + 8) >> 2) + ((len + 0xc) >> 3) * 2; if (packet_size + 2 == size) { auto* nop = reinterpret_cast(cmdbuf); nop->header = diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b1a0845f..561014a3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -174,14 +174,18 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { } void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { + const auto& info = program.info; const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); - ctx.AddCapability(spv::Capability::Float16); - ctx.AddCapability(spv::Capability::Int16); - ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); + if (info.uses_fp16) { + ctx.AddCapability(spv::Capability::Float16); + ctx.AddCapability(spv::Capability::Int16); + } + if (info.has_storage_images) { + ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); + } switch (program.info.stage) { case Stage::Compute: { const std::array workgroup_size{program.info.workgroup_size}; @@ -200,13 +204,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } else { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); } - if (program.info.uses_group_quad) { + if (info.uses_group_quad) { ctx.AddCapability(spv::Capability::GroupNonUniform); ctx.AddCapability(spv::Capability::GroupNonUniformQuad); } - ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); - ctx.AddCapability(spv::Capability::ImageGatherExtended); - ctx.AddCapability(spv::Capability::ImageQuery); + if (info.has_discard) { + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } + if (info.has_image_gather) { + ctx.AddCapability(spv::Capability::ImageGatherExtended); + } + if (info.has_image_query) { + ctx.AddCapability(spv::Capability::ImageQuery); + } // if (program.info.stores_frag_depth) { // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); // } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index b4a67ebf..6d9b2547 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -74,21 +74,19 @@ Id EmitContext::Def(const IR::Value& value) { void EmitContext::DefineArithmeticTypes() { void_id = Name(TypeVoid(), "void_id"); U1[1] = Name(TypeBool(), "bool_id"); - F16[1] = Name(TypeFloat(16), "f16_id"); + if (info.uses_fp16) { + F16[1] = Name(TypeFloat(16), "f16_id"); + U16 = Name(TypeUInt(16), "u16_id"); + } F32[1] = Name(TypeFloat(32), "f32_id"); - // F64[1] = Name(TypeFloat(64), "f64_id"); S32[1] = Name(TypeSInt(32), "i32_id"); U32[1] = Name(TypeUInt(32), "u32_id"); - // U8 = Name(TypeSInt(8), "u8"); - // S8 = Name(TypeUInt(8), "s8"); - U16 = Name(TypeUInt(16), "u16_id"); - // S16 = Name(TypeSInt(16), "s16_id"); - // U64 = Name(TypeUInt(64), "u64_id"); for (u32 i = 2; i <= 4; i++) { - F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i)); + if (info.uses_fp16) { + F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i)); + } F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i)); - // F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i)); S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i)); U32[i] = Name(TypeVector(U32[1], i), fmt::format("u32vec{}_id", i)); U1[i] = Name(TypeVector(U1[1], i), fmt::format("bvec{}_id", i)); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3d0857c0..cb6d16c3 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -396,6 +396,7 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::IMAGE_SAMPLE_L: case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_B: + case Opcode::IMAGE_SAMPLE_C_LZ_O: translator.IMAGE_SAMPLE(inst); break; case Opcode::IMAGE_ATOMIC_ADD: diff --git a/src/shader_recompiler/ir/breadth_first_search.h b/src/shader_recompiler/ir/breadth_first_search.h new file mode 100644 index 00000000..21a34a90 --- /dev/null +++ b/src/shader_recompiler/ir/breadth_first_search.h @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +template +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + // Breadth-first search visiting the right most arguments first + boost::container::small_vector visited; + std::queue queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = pred(inst)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + const Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + // SSA tree has been traversed and the result hasn't been found + return std::nullopt; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 4382bff3..b7d6a722 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -4,8 +4,8 @@ #include #include #include - #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/runtime_info.h" @@ -244,22 +244,19 @@ SharpLocation TrackSharp(const IR::Inst* inst) { const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive(); // Retrieve SGPR pair that holds sbase - const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive(); - const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive(); - while (sbase0->GetOpcode() == IR::Opcode::Phi) { - sbase0 = sbase0->Arg(0).TryInstRecursive(); - } - while (sbase1->GetOpcode() == IR::Opcode::Phi) { - sbase1 = sbase1->Arg(0).TryInstRecursive(); - } - ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData && - sbase1->GetOpcode() == IR::Opcode::GetUserData, - "Nested resource loads not supported"); - const IR::ScalarReg base = sbase0->Arg(0).ScalarReg(); + const auto pred = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { + return inst->Arg(0).ScalarReg(); + } + return std::nullopt; + }; + const auto base0 = IR::BreadthFirstSearch(spgpr_base->Arg(0), pred); + const auto base1 = IR::BreadthFirstSearch(spgpr_base->Arg(1), pred); + ASSERT_MSG(base0 && base1, "Nested resource loads not supported"); // Return retrieved location. return SharpLocation{ - .sgpr_base = u32(base), + .sgpr_base = u32(base0.value()), .dword_offset = dword_offset, }; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 25d8b937..1cec237f 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -26,9 +26,27 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::WriteSharedU16: info.uses_shared_u16 = true; break; + case IR::Opcode::ConvertF32F16: + case IR::Opcode::BitCastF16U16: + info.uses_fp16 = true; + break; + case IR::Opcode::ImageWrite: + info.has_storage_images = true; + break; case IR::Opcode::QuadShuffle: info.uses_group_quad = true; break; + case IR::Opcode::Discard: + info.has_discard = true; + break; + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + info.has_image_gather = true; + break; + case IR::Opcode::ImageQueryDimensions: + case IR::Opcode::ImageQueryLod: + info.has_image_query = true; + break; default: break; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 993207eb..66d32d4d 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -169,9 +169,14 @@ struct Info { uintptr_t pgm_base{}; u64 pgm_hash{}; u32 shared_memory_size{}; + bool has_storage_images{}; + bool has_discard{}; + bool has_image_gather{}; + bool has_image_query{}; bool uses_group_quad{}; bool uses_shared_u8{}; bool uses_shared_u16{}; + bool uses_fp16{}; bool translation_failed{}; // indicates that shader has unsupported instructions template diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 6e19f55d..d18482f6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -3,12 +3,6 @@ #pragma once -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/types.h" -#include "resource.h" -#include "video_core/amdgpu/pixel_format.h" - #include #include #include @@ -16,6 +10,11 @@ #include #include #include +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/types.h" +#include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/resource.h" namespace Vulkan { class Rasterizer; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 2509467f..dca7ff3d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -422,6 +422,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16Sint; } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Uscaled) { + return vk::Format::eR8G8B8A8Uscaled; + } + if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR16Unorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index c071cc2f..b9f5bce3 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -109,16 +109,14 @@ std::string Instance::GetDriverVersionName() { bool Instance::CreateDevice() { const vk::StructureChain feature_chain = physical_device.getFeatures2< - vk::PhysicalDeviceFeatures2, vk::PhysicalDevicePortabilitySubsetFeaturesKHR, - vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, - vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR, - vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, - vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, - vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT, - vk::PhysicalDeviceColorWriteEnableFeaturesEXT, - vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); + vk::PhysicalDeviceCustomBorderColorFeaturesEXT, + vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features, + vk::PhysicalDeviceVulkan13Features, + vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR, + vk::PhysicalDeviceDepthClipControlFeaturesEXT>(); const vk::StructureChain properties_chain = physical_device.getProperties2 enabled_extensions; + boost::container::static_vector enabled_extensions; const auto add_extension = [&](std::string_view extension) -> bool { const auto result = std::find_if(available_extensions.begin(), available_extensions.end(), @@ -156,7 +154,8 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); - add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + workgroup_memory_explicit_layout = + add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); @@ -190,6 +189,8 @@ bool Instance::CreateDevice() { .pQueuePriorities = queue_priorities.data(), }; + const auto vk12_features = feature_chain.get(); + const auto vk13_features = feature_chain.get(); vk::StructureChain device_chain = { vk::DeviceCreateInfo{ .queueCreateInfoCount = 1u, @@ -200,32 +201,33 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFeatures2{ .features{ .robustBufferAccess = features.robustBufferAccess, - .independentBlend = true, + .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, .logicOp = features.logicOp, - .multiViewport = true, + .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, - .shaderImageGatherExtended = true, - .shaderStorageImageMultisample = true, + .shaderImageGatherExtended = features.shaderImageGatherExtended, + .shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats, + .shaderStorageImageMultisample = features.shaderStorageImageMultisample, .shaderClipDistance = features.shaderClipDistance, - .shaderInt16 = true, + .shaderInt16 = features.shaderInt16, }, }, vk::PhysicalDeviceVulkan11Features{ .shaderDrawParameters = true, }, vk::PhysicalDeviceVulkan12Features{ - .shaderFloat16 = true, - .scalarBlockLayout = true, - .uniformBufferStandardLayout = true, - .hostQueryReset = true, - .timelineSemaphore = true, + .shaderFloat16 = vk12_features.shaderFloat16, + .scalarBlockLayout = vk12_features.scalarBlockLayout, + .uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout, + .hostQueryReset = vk12_features.hostQueryReset, + .timelineSemaphore = vk12_features.timelineSemaphore, }, vk::PhysicalDeviceVulkan13Features{ - .shaderDemoteToHelperInvocation = true, - .dynamicRendering = true, - .maintenance4 = true, + .shaderDemoteToHelperInvocation = vk13_features.shaderDemoteToHelperInvocation, + .dynamicRendering = vk13_features.dynamicRendering, + .maintenance4 = vk13_features.maintenance4, }, vk::PhysicalDeviceCustomBorderColorFeaturesEXT{ .customBorderColors = true, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index ad6196ab..cc2660d5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -231,6 +231,7 @@ private: bool fragment_shader_barycentric{}; bool shader_stencil_export{}; bool external_memory_host{}; + bool workgroup_memory_explicit_layout{}; bool color_write_en{}; u64 min_imported_host_pointer_alignment{}; bool tooling_info{};