diff --git a/CMakeLists.txt b/CMakeLists.txt index dfc6528d..d2d1c170 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -521,6 +521,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h + src/video_core/renderer_vulkan/vk_shader_cache.cpp + src/video_core/renderer_vulkan/vk_shader_cache.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp diff --git a/src/core/libraries/avplayer/avplayer.cpp b/src/core/libraries/avplayer/avplayer.cpp index 87a0e49e..23e1e987 100644 --- a/src/core/libraries/avplayer/avplayer.cpp +++ b/src/core/libraries/avplayer/avplayer.cpp @@ -120,7 +120,6 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle, } SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) { - return nullptr; LOG_TRACE(Lib_AvPlayer, "called"); if (data == nullptr) { return nullptr; diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 455486b0..605d0d29 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1066,16 +1066,7 @@ ScePthread PThreadPool::Create() { } } -#ifdef _WIN64 auto* ret = new PthreadInternal{}; -#else - // TODO: Linux specific hack - static u8* hint_address = reinterpret_cast(0x7FFFFC000ULL); - auto* ret = reinterpret_cast( - mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); - hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB); -#endif ret->is_free = false; ret->is_detached = false; ret->is_almost_done = false; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 1ba11bfb..161d4ec9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -201,6 +201,12 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.uses_lane_id) { + ctx.AddCapability(spv::Capability::GroupNonUniform); + } + if (info.uses_group_quad) { + ctx.AddCapability(spv::Capability::GroupNonUniformQuad); + } switch (program.info.stage) { case Stage::Compute: { const std::array workgroup_size{program.info.workgroup_size}; @@ -219,10 +225,6 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } else { ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); } - if (info.uses_group_quad) { - ctx.AddCapability(spv::Capability::GroupNonUniform); - ctx.AddCapability(spv::Capability::GroupNonUniformQuad); - } if (info.has_discard) { ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index b45a5aa3..e4019604 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -132,6 +132,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { case AmdGpu::NumberFormat::SnormNz: case AmdGpu::NumberFormat::Sscaled: case AmdGpu::NumberFormat::Uscaled: + case AmdGpu::NumberFormat::Srgb: return ctx.F32; case AmdGpu::NumberFormat::Sint: return ctx.S32; @@ -140,7 +141,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { default: break; } - throw InvalidArgument("Invalid attribute type {}", fmt); + UNREACHABLE_MSG("Invalid attribute type {}", fmt); } EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) { @@ -161,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f default: break; } - throw InvalidArgument("Invalid attribute type {}", fmt); + UNREACHABLE_MSG("Invalid attribute type {}", fmt); } void EmitContext::DefineBufferOffsets() { @@ -204,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { } void EmitContext::DefineInputs() { + if (info.uses_lane_id) { + subgroup_local_invocation_id = DefineVariable( + U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); + Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); + } switch (stage) { case Stage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); @@ -238,9 +244,6 @@ void EmitContext::DefineInputs() { break; } case Stage::Fragment: - subgroup_local_invocation_id = DefineVariable( - U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); - Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); @@ -354,12 +357,12 @@ void EmitContext::DefineBuffers() { }; for (const auto& desc : info.buffers) { - const auto sharp = desc.GetVsharp(info); + const auto sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(sharp); const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; const Id record_array_type{is_storage ? TypeRuntimeArray(data_type) - : TypeArray(data_type, ConstU32(desc.length))}; + : TypeArray(data_type, ConstU32(sharp.NumDwords()))}; const Id struct_type{define_struct(record_array_type, desc.is_instance_data)}; const auto storage_class = @@ -369,6 +372,9 @@ void EmitContext::DefineBuffers() { const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); + if (is_storage && !desc.is_written) { + Decorate(id, spv::Decoration::NonWritable); + } Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base)); buffers.push_back({ @@ -503,17 +509,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { void EmitContext::DefineImagesAndSamplers() { for (const auto& image_desc : info.images) { - const VectorIds* data_types = [&] { - switch (image_desc.nfmt) { - case AmdGpu::NumberFormat::Uint: - return &U32; - case AmdGpu::NumberFormat::Sint: - return &S32; - default: - return &F32; - } - }(); - const Id sampled_type = data_types->Get(1); + const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt); + const Id sampled_type = data_types[1]; const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; @@ -522,7 +519,7 @@ void EmitContext::DefineImagesAndSamplers() { Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base, image_desc.dword_offset)); images.push_back({ - .data_types = data_types, + .data_types = &data_types, .id = id, .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, @@ -531,13 +528,12 @@ void EmitContext::DefineImagesAndSamplers() { interfaces.push_back(id); ++binding; } - - image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); - + if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { + image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + } if (info.samplers.empty()) { return; } - sampler_type = TypeSampler(); sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type); for (const auto& samp_desc : info.samplers) { @@ -553,7 +549,7 @@ void EmitContext::DefineImagesAndSamplers() { } void EmitContext::DefineSharedMemory() { - static constexpr size_t DefaultSharedMemSize = 16_KB; + static constexpr size_t DefaultSharedMemSize = 2_KB; if (!info.uses_shared) { return; } diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 889de21b..d80de002 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -1,14 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/logging/log.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { void Translator::EmitExport(const GcnInst& inst) { if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) { - LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation"); ir.Discard(ir.LogicalNot(ir.GetExec())); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2cf18690..e3482546 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -399,7 +399,6 @@ void Translator::EmitFetch(const GcnInst& inst) { info.buffers.push_back({ .sgpr_base = attrib.sgpr_base, .dword_offset = attrib.dword_offset, - .length = buffer.num_records, .used_types = IR::Type::F32, .is_instance_data = true, }); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 274dcff1..13a8342d 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -415,14 +415,20 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { const auto src0 = GetSrc(inst.src[0]); const auto src1 = GetSrc(inst.src[1]); - IR::U32 scarry; + IR::U1 carry; if (inst.src_count == 3) { // VOP3 - IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))}; - scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))}; + if (inst.src[2].field == OperandField::VccLo) { + carry = ir.GetVcc(); + } else if (inst.src[2].field == OperandField::ScalarGPR) { + carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code)); + } else { + UNREACHABLE(); + } } else { // VOP2 - scarry = ir.GetVccLo(); + carry = ir.GetVcc(); } + const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); const IR::VectorReg dst_reg{inst.dst[0].code}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 953273b2..f446ac47 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -3,6 +3,7 @@ #include #include +#include "common/alignment.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" @@ -195,7 +196,6 @@ public: desc.inline_cbuf == existing.inline_cbuf; })}; auto& buffer = buffer_resources[index]; - ASSERT(buffer.length == desc.length); buffer.used_types |= desc.used_types; buffer.is_written |= desc.is_written; return index; @@ -227,7 +227,7 @@ public: return true; } // Samplers with different bindings might still be the same. - return existing.GetSsharp(info) == desc.GetSsharp(info); + return existing.GetSharp(info) == desc.GetSharp(info); })}; return index; } @@ -342,19 +342,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) { }; } -static u32 BufferLength(const AmdGpu::Buffer& buffer) { - const auto stride = buffer.GetStride(); - if (stride < sizeof(f32)) { - ASSERT(sizeof(f32) % stride == 0); - return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride; - } else if (stride == sizeof(f32)) { - return buffer.num_records; - } else { - ASSERT(stride % sizeof(f32) == 0); - return buffer.num_records * (stride / sizeof(f32)); - } -} - s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) { @@ -381,7 +368,6 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return descriptors.Add(BufferResource{ .sgpr_base = std::numeric_limits::max(), .dword_offset = 0, - .length = BufferLength(cbuf), .used_types = BufferDataType(inst, cbuf.GetNumberFmt()), .inline_cbuf = cbuf, }); @@ -399,7 +385,6 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, binding = descriptors.Add(BufferResource{ .sgpr_base = sharp.sgpr_base, .dword_offset = sharp.dword_offset, - .length = BufferLength(buffer), .used_types = BufferDataType(inst, buffer.GetNumberFmt()), .is_written = IsBufferStore(inst), }); diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index c61494a6..7105f01f 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -50,6 +50,9 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: info.has_image_query = true; break; + case IR::Opcode::LaneId: + info.uses_lane_id = true; + break; default: break; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index b185005a..77c57e94 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include "common/assert.h" #include "common/types.h" @@ -12,10 +13,6 @@ #include "shader_recompiler/ir/type.h" #include "video_core/amdgpu/resource.h" -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} - namespace Shader { static constexpr size_t NumUserDataRegs = 16; @@ -78,31 +75,19 @@ struct Info; struct BufferResource { u32 sgpr_base; u32 dword_offset; - u32 length; IR::Type used_types; AmdGpu::Buffer inline_cbuf; bool is_instance_data{}; bool is_written{}; - static constexpr size_t MaxUboSize = 65536; - bool IsStorage(AmdGpu::Buffer buffer) const noexcept { + static constexpr size_t MaxUboSize = 65536; return buffer.GetSize() > MaxUboSize || is_written; } - u64 GetKey(const Info& info) const { - const auto sharp = GetVsharp(info); - u64 key = sharp.GetStride(); - if (!is_written) { - key <<= 1; - key |= IsStorage(sharp); - } - return key; - } - - constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; }; -using BufferResourceList = boost::container::static_vector; +using BufferResourceList = boost::container::small_vector; struct TextureBufferResource { u32 sgpr_base; @@ -110,16 +95,9 @@ struct TextureBufferResource { AmdGpu::NumberFormat nfmt; bool is_written{}; - u64 GetKey(const Info& info) const { - const auto sharp = GetVsharp(info); - const bool is_integer = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Uint || - sharp.GetNumberFmt() == AmdGpu::NumberFormat::Sint; - return is_integer; - } - - constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; }; -using TextureBufferResourceList = boost::container::static_vector; +using TextureBufferResourceList = boost::container::small_vector; struct ImageResource { u32 sgpr_base; @@ -130,14 +108,9 @@ struct ImageResource { bool is_depth; bool is_atomic{}; - u64 GetKey(const Info& info) const { - const auto sharp = GetTsharp(info); - return sharp.type; - } - - constexpr AmdGpu::Image GetTsharp(const Info& info) const noexcept; + constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; -using ImageResourceList = boost::container::static_vector; +using ImageResourceList = boost::container::small_vector; struct SamplerResource { u32 sgpr_base; @@ -146,9 +119,9 @@ struct SamplerResource { u32 associated_image : 4; u32 disable_aniso : 1; - constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept; + constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; }; -using SamplerResourceList = boost::container::static_vector; +using SamplerResourceList = boost::container::small_vector; struct PushData { static constexpr size_t BufOffsetIndex = 2; @@ -242,9 +215,10 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; + bool uses_lane_id{}; bool uses_group_quad{}; bool uses_shared{}; - bool uses_fp16{true}; + bool uses_fp16{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions @@ -263,20 +237,6 @@ struct Info { return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); } - u64 GetStageSpecializedKey(u32 binding = 0) const noexcept { - u64 key = HashCombine(pgm_hash, binding); - for (const auto& buffer : buffers) { - key = HashCombine(key, buffer.GetKey(*this)); - } - for (const auto& buffer : texture_buffers) { - key = HashCombine(key, buffer.GetKey(*this)); - } - for (const auto& image : images) { - key = HashCombine(key, image.GetKey(*this)); - } - return key; - } - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { u32 vertex_offset = 0; u32 instance_offset = 0; @@ -290,19 +250,19 @@ struct Info { } }; -constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept { +constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); } -constexpr AmdGpu::Buffer TextureBufferResource::GetVsharp(const Info& info) const noexcept { +constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { return info.ReadUd(sgpr_base, dword_offset); } -constexpr AmdGpu::Image ImageResource::GetTsharp(const Info& info) const noexcept { +constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { return info.ReadUd(sgpr_base, dword_offset); } -constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept { +constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); } diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 1004ed7d..53d30a7f 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -61,6 +61,10 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; +[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { + return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; +} + [[nodiscard]] std::string_view NameOf(DataFormat fmt); [[nodiscard]] std::string_view NameOf(NumberFormat fmt); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 8c3b675e..b85a3788 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -3,6 +3,7 @@ #pragma once +#include "common/alignment.h" #include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" @@ -68,6 +69,10 @@ struct Buffer { return stride == 0 ? 1U : stride; } + u32 NumDwords() const noexcept { + return Common::AlignUp(GetSize(), sizeof(u32)) >> 2; + } + u32 GetSize() const noexcept { return GetStride() * num_records; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 71cc9815..e62c1040 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -24,7 +24,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler u32 binding{}; boost::container::small_vector bindings; for (const auto& buffer : info->buffers) { - const auto sharp = buffer.GetVsharp(*info); + const auto sharp = buffer.GetSharp(*info); bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer @@ -107,17 +107,17 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, Shader::PushData push_data{}; u32 binding{}; - for (const auto& buffer : info->buffers) { - const auto vsharp = buffer.GetVsharp(*info); - const bool is_storage = buffer.IsStorage(vsharp); + for (const auto& desc : info->buffers) { + const auto vsharp = desc.GetSharp(*info); + const bool is_storage = desc.IsStorage(vsharp); const VAddr address = vsharp.base_address; // Most of the time when a metadata is updated with a shader it gets cleared. It means we // can skip the whole dispatch and update the tracked state instead. Also, it is not // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will // need its full emulation anyways. For cases of metadata read a warning will be logged. - if (is_storage) { + if (desc.is_written) { if (texture_cache.TouchMeta(address, true)) { - LOG_WARNING(Render_Vulkan, "Metadata update skipped"); + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); return false; } } else { @@ -126,13 +126,12 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } } const u32 size = vsharp.GetSize(); - if (buffer.is_written) { + if (desc.is_written) { texture_cache.InvalidateMemory(address, size, true); } const u32 alignment = is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, buffer.is_written); + const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; if (adjust != 0) { @@ -151,18 +150,28 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, }); } - for (const auto& tex_buffer : info->texture_buffers) { - const auto vsharp = tex_buffer.GetVsharp(*info); + for (const auto& desc : info->texture_buffers) { + const auto vsharp = desc.GetSharp(*info); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { const VAddr address = vsharp.base_address; const u32 size = vsharp.GetSize(); - if (tex_buffer.is_written) { + if (desc.is_written) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } + } + if (desc.is_written) { texture_cache.InvalidateMemory(address, size, true); } const u32 alignment = instance.TexelBufferMinAlignment(); const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written); + buffer_cache.ObtainBuffer(address, size, desc.is_written); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; ASSERT_MSG(fmt_stride == vsharp.GetStride(), "Texel buffer stride must match format stride"); @@ -172,7 +181,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding, adjust / fmt_stride); } - buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written, + buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); } set_writes.push_back({ @@ -180,19 +189,23 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer - : vk::DescriptorType::eUniformTexelBuffer, + .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer + : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); } for (const auto& image_desc : info->images) { - const auto tsharp = image_desc.GetTsharp(*info); - VideoCore::ImageInfo image_info{tsharp}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + const auto tsharp = image_desc.GetSharp(*info); + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + const auto& image_view = texture_cache.FindTexture(image_info, view_info); + const auto& image = texture_cache.GetImage(image_view.image_id); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); + } else { + image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, @@ -208,7 +221,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } } for (const auto& sampler : info->samplers) { - const auto ssharp = sampler.GetSsharp(*info); + const auto ssharp = sampler.GetSharp(*info); const auto vk_sampler = texture_cache.GetSampler(ssharp); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f56de831..719a911a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -307,7 +307,7 @@ void GraphicsPipeline::BuildDescSetLayout() { continue; } for (const auto& buffer : stage->buffers) { - const auto sharp = buffer.GetVsharp(*stage); + const auto sharp = buffer.GetSharp(*stage); bindings.push_back({ .binding = binding++, .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer @@ -356,7 +356,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. boost::container::static_vector buffer_views; - boost::container::static_vector buffer_infos; + boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; Shader::PushData push_data{}; @@ -371,7 +371,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, push_data.step1 = regs.vgt_instance_step_rate_1; } for (const auto& buffer : stage->buffers) { - const auto vsharp = buffer.GetVsharp(*stage); + const auto vsharp = buffer.GetSharp(*stage); const bool is_storage = buffer.IsStorage(vsharp); if (vsharp) { const VAddr address = vsharp.base_address; @@ -405,7 +405,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } for (const auto& tex_buffer : stage->texture_buffers) { - const auto vsharp = tex_buffer.GetVsharp(*stage); + const auto vsharp = tex_buffer.GetSharp(*stage); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { const VAddr address = vsharp.base_address; @@ -438,7 +438,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, boost::container::static_vector tsharps; for (const auto& image_desc : stage->images) { - const auto tsharp = image_desc.GetTsharp(*stage); + const auto tsharp = image_desc.GetSharp(*stage); if (tsharp) { tsharps.emplace_back(tsharp); VideoCore::ImageInfo image_info{tsharp}; @@ -464,7 +464,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } } for (const auto& sampler : stage->samplers) { - auto ssharp = sampler.GetSsharp(*stage); + auto ssharp = sampler.GetSharp(*stage); if (sampler.disable_aniso) { const auto& tsharp = tsharps[sampler.associated_image]; if (tsharp.base_level == 0 && tsharp.last_level == 0) { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 1be331a7..e1a5cb41 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -278,6 +278,7 @@ bool Instance::CreateDevice() { .depthBiasClamp = features.depthBiasClamp, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, + .vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics, .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, .shaderImageGatherExtended = features.shaderImageGatherExtended, .shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 08c56dde..1a90a584 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,122 +1,26 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/config.h" -#include "common/io_file.h" -#include "common/path_util.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_shader_cache.h" extern std::unique_ptr renderer; namespace Vulkan { -using Shader::VsOutput; - -void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { - const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { - if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || - w != VsOutput::None) { - info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); - } - }; - // VS_OUT_MISC_VEC - add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, - ctl.use_vtx_edge_flag - ? VsOutput::EdgeFlag - : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), - ctl.use_vtx_kill_flag - ? VsOutput::KillFlag - : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), - ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); - // VS_OUT_CCDIST0 - add_output(ctl.IsClipDistEnabled(0) - ? VsOutput::ClipDist0 - : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), - ctl.IsClipDistEnabled(1) - ? VsOutput::ClipDist1 - : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), - ctl.IsClipDistEnabled(2) - ? VsOutput::ClipDist2 - : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), - ctl.IsClipDistEnabled(3) - ? VsOutput::ClipDist3 - : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); - // VS_OUT_CCDIST1 - add_output(ctl.IsClipDistEnabled(4) - ? VsOutput::ClipDist4 - : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), - ctl.IsClipDistEnabled(5) - ? VsOutput::ClipDist5 - : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), - ctl.IsClipDistEnabled(6) - ? VsOutput::ClipDist6 - : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), - ctl.IsClipDistEnabled(7) - ? VsOutput::ClipDist7 - : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); -} - -Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, u64 pgm_base, - u64 hash, const AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{}; - info.user_data = user_data; - info.pgm_base = pgm_base; - info.pgm_hash = hash; - info.stage = stage; - switch (stage) { - case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - BuildVsOutputs(info, regs.vs_output_control); - break; - } - case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - for (u32 i = 0; i < regs.num_interp; i++) { - info.ps_inputs.push_back({ - .param_index = regs.ps_inputs[i].input_offset.Value(), - .is_default = bool(regs.ps_inputs[i].use_default), - .is_flat = bool(regs.ps_inputs[i].flat_shade), - .default_value = regs.ps_inputs[i].default_value, - }); - } - break; - } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; - info.num_user_data = cs_pgm.settings.num_user_regs; - info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, - cs_pgm.num_thread_z.full}; - info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), - cs_pgm.IsTgidEnabled(2)}; - info.shared_memory_size = cs_pgm.SharedMemSize(); - break; - } - default: - break; - } - return info; -} - PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, - block_pool{512} { + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, + shader_cache{std::make_unique(instance, liverpool)} { pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); - profile = Shader::Profile{ - .supported_spirv = 0x00010600U, - .subgroup_size = instance.SubgroupSize(), - .support_explicit_workgroup_layout = true, - }; } +PipelineCache::~PipelineCache() = default; + const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto& regs = liverpool->regs; // Tessellation is unsupported so skip the draw to avoid locking up the driver. @@ -257,7 +161,8 @@ void PipelineCache::RefreshGraphicsKey() { continue; } const auto stage = Shader::Stage{i}; - std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(pgm, stage, binding); + std::tie(infos[i], modules[i], key.stage_hashes[i]) = + shader_cache->GetProgram(pgm, stage, binding); } } @@ -265,47 +170,7 @@ void PipelineCache::RefreshComputeKey() { u32 binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; std::tie(infos[0], modules[0], compute_key) = - GetProgram(cs_pgm, Shader::Stage::Compute, binding); -} - -vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span code, - size_t perm_idx, u32& binding) { - LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, - perm_idx != 0 ? "(permutation)" : ""); - - if (Config::dumpShaders()) { - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - } - - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); - - // Compile IR to SPIR-V - const u64 key = info.GetStageSpecializedKey(binding); - const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); - if (Config::dumpShaders()) { - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); - } - - // Create module and set name to hash in renderdoc - const auto module = CompileSPV(spv, instance.GetDevice()); - ASSERT(module != VK_NULL_HANDLE); - const auto name = fmt::format("{}_{:#x}_{}", info.stage, key, perm_idx); - Vulkan::SetObjectName(instance.GetDevice(), module, name); - return module; -} - -void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, - size_t perm_idx, std::string_view ext) { - using namespace Common::FS; - const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; - if (!std::filesystem::exists(dump_dir)) { - std::filesystem::create_directories(dump_dir); - } - const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteSpan(code); + shader_cache->GetProgram(cs_pgm, Shader::Stage::Compute, binding); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4d262a50..d0eb0c66 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,9 +4,6 @@ #pragma once #include -#include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/ir/program.h" -#include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -18,15 +15,7 @@ namespace Vulkan { class Instance; class Scheduler; - -struct Program { - using Module = std::pair; - Shader::Info info; - boost::container::small_vector modules; -}; - -Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, u64 pgm_base, - u64 hash, const AmdGpu::Liverpool::Regs& regs); +class ShaderCache; class PipelineCache { static constexpr size_t MaxShaderStages = 5; @@ -34,7 +23,7 @@ class PipelineCache { public: explicit PipelineCache(const Instance& instance, Scheduler& scheduler, AmdGpu::Liverpool* liverpool); - ~PipelineCache() = default; + ~PipelineCache(); const GraphicsPipeline* GetGraphicsPipeline(); @@ -43,54 +32,6 @@ public: private: void RefreshGraphicsKey(); void RefreshComputeKey(); - void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, - std::string_view ext); - - vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, - u32& binding); - - std::tuple GetProgram(const auto* pgm, - Shader::Stage stage, - u32& binding) { - // Fetch program for binaryinfo hash. - const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); - const u64 hash = bininfo->shader_hash; - auto [it_pgm, new_program] = program_cache.try_emplace(hash); - u64 stage_key{}; - if (new_program) { - // Create a new program and a module with current runtime state. - const VAddr pgm_base = pgm->template Address(); - auto program = program_pool.Create(); - program->info = MakeShaderInfo(stage, pgm->user_data, pgm_base, hash, liverpool->regs); - u32 start_binding = binding; - const auto module = CompileModule(program->info, pgm->Code(), 0, start_binding); - stage_key = program->info.GetStageSpecializedKey(binding); - program->modules.emplace_back(stage_key, module); - it_pgm.value() = program; - } else { - stage_key = it_pgm->second->info.GetStageSpecializedKey(binding); - } - - Program* program = it_pgm->second; - const auto& info = program->info; - vk::ShaderModule module{}; - - // Compile specialized module with current runtime state. - const auto it = std::ranges::find(program->modules, stage_key, &Program::Module::first); - if (it == program->modules.end()) { - auto new_info = MakeShaderInfo(stage, pgm->user_data, info.pgm_base, info.pgm_hash, - liverpool->regs); - const size_t perm_idx = program->modules.size(); - module = CompileModule(new_info, pgm->Code(), perm_idx, binding); - program->modules.emplace_back(stage_key, module); - } else { - binding += info.NumBindings(); - module = it->second; - } - - const u64 full_hash = HashCombine(hash, stage_key); - return std::make_tuple(&info, module, full_hash); - } private: const Instance& instance; @@ -98,17 +39,13 @@ private: AmdGpu::Liverpool* liverpool; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; - tsl::robin_map program_cache; + std::unique_ptr shader_cache; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; std::array infos{}; std::array modules{}; - Shader::Profile profile{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; - Common::ObjectPool program_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.cpp b/src/video_core/renderer_vulkan/vk_shader_cache.cpp new file mode 100644 index 00000000..3bf599a7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_cache.cpp @@ -0,0 +1,152 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/recompiler.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_shader_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +using Shader::VsOutput; + +void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { + const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { + if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || + w != VsOutput::None) { + info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + } + }; + // VS_OUT_MISC_VEC + add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, + ctl.use_vtx_edge_flag + ? VsOutput::EdgeFlag + : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), + ctl.use_vtx_kill_flag + ? VsOutput::KillFlag + : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), + ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); + // VS_OUT_CCDIST0 + add_output(ctl.IsClipDistEnabled(0) + ? VsOutput::ClipDist0 + : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), + ctl.IsClipDistEnabled(1) + ? VsOutput::ClipDist1 + : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), + ctl.IsClipDistEnabled(2) + ? VsOutput::ClipDist2 + : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), + ctl.IsClipDistEnabled(3) + ? VsOutput::ClipDist3 + : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); + // VS_OUT_CCDIST1 + add_output(ctl.IsClipDistEnabled(4) + ? VsOutput::ClipDist4 + : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), + ctl.IsClipDistEnabled(5) + ? VsOutput::ClipDist5 + : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), + ctl.IsClipDistEnabled(6) + ? VsOutput::ClipDist6 + : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), + ctl.IsClipDistEnabled(7) + ? VsOutput::ClipDist7 + : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); +} + +Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, u64 pgm_base, + u64 hash, const AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{}; + info.user_data = user_data; + info.pgm_base = pgm_base; + info.pgm_hash = hash; + info.stage = stage; + switch (stage) { + case Shader::Stage::Vertex: { + info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + BuildVsOutputs(info, regs.vs_output_control); + break; + } + case Shader::Stage::Fragment: { + info.num_user_data = regs.ps_program.settings.num_user_regs; + for (u32 i = 0; i < regs.num_interp; i++) { + info.ps_inputs.push_back({ + .param_index = regs.ps_inputs[i].input_offset.Value(), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = regs.ps_inputs[i].default_value, + }); + } + break; + } + case Shader::Stage::Compute: { + const auto& cs_pgm = regs.cs_program; + info.num_user_data = cs_pgm.settings.num_user_regs; + info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, + cs_pgm.num_thread_z.full}; + info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; + info.shared_memory_size = cs_pgm.SharedMemSize(); + break; + } + default: + break; + } + return info; +} + +ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_) + : instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { + profile = Shader::Profile{ + .supported_spirv = 0x00010600U, + .subgroup_size = instance.SubgroupSize(), + .support_explicit_workgroup_layout = true, + }; +} + +vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span code, + size_t perm_idx, u32& binding) { + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + + if (Config::dumpShaders()) { + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); + } + + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); + + // Compile IR to SPIR-V + const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); + if (Config::dumpShaders()) { + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + } + + // Create module and set name to hash in renderdoc + const auto module = CompileSPV(spv, instance.GetDevice()); + ASSERT(module != VK_NULL_HANDLE); + const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); + Vulkan::SetObjectName(instance.GetDevice(), module, name); + return module; +} + +void ShaderCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, + size_t perm_idx, std::string_view ext) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(code); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.h b/src/video_core/renderer_vulkan/vk_shader_cache.h new file mode 100644 index 00000000..387bd45c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_cache.h @@ -0,0 +1,184 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/object_pool.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +struct BufferSpecialization { + u16 stride : 14; + u16 is_storage : 1; + + auto operator<=>(const BufferSpecialization&) const = default; +}; + +struct TextureBufferSpecialization { + bool is_integer; + + auto operator<=>(const TextureBufferSpecialization&) const = default; +}; + +struct ImageSpecialization { + AmdGpu::ImageType type; + bool is_integer; + + auto operator<=>(const ImageSpecialization&) const = default; +}; + +struct StageSpecialization { + static constexpr size_t MaxStageResources = 32; + + const Shader::Info* info; + std::bitset bitset{}; + boost::container::small_vector buffers; + boost::container::small_vector tex_buffers; + boost::container::small_vector images; + u32 start_binding{}; + + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + binding++; + continue; + } + bitset.set(binding++); + func(spec, desc, sharp); + } + } + + StageSpecialization(const Shader::Info& info_, u32 start_binding_) + : info{&info_}, start_binding{start_binding_} { + u32 binding{}; + ForEachSharp(binding, buffers, info->buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.stride = sharp.GetStride(); + spec.is_storage = desc.IsStorage(sharp); + }); + ForEachSharp(binding, tex_buffers, info->texture_buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + ForEachSharp(binding, images, info->images, + [](auto& spec, const auto& desc, AmdGpu::Image sharp) { + spec.type = sharp.GetType(); + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + } + + bool operator==(const StageSpecialization& other) const { + if (start_binding != other.start_binding) { + return false; + } + u32 binding{}; + for (u32 i = 0; i < buffers.size(); i++) { + if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { + return false; + } + } + for (u32 i = 0; i < tex_buffers.size(); i++) { + if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) { + return false; + } + } + for (u32 i = 0; i < images.size(); i++) { + if (other.bitset[binding++] && images[i] != other.images[i]) { + return false; + } + } + return true; + } +}; + +struct Program { + struct Module { + vk::ShaderModule module; + StageSpecialization spec; + }; + + Shader::Info info; + boost::container::small_vector modules; +}; + +Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, u64 pgm_base, + u64 hash, const AmdGpu::Liverpool::Regs& regs); + +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + +class ShaderCache { +public: + explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool); + ~ShaderCache() = default; + + void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, + std::string_view ext); + + vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, + u32& binding); + + std::tuple GetProgram(const auto* pgm, + Shader::Stage stage, + u32& binding) { + // Fetch program for binaryinfo hash. + const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); + const u64 hash = bininfo->shader_hash; + auto [it_pgm, new_program] = program_cache.try_emplace(hash); + u64 stage_key{}; + if (new_program) { + const VAddr pgm_base = pgm->template Address(); + auto program = program_pool.Create(); + program->info = MakeShaderInfo(stage, pgm->user_data, pgm_base, hash, liverpool->regs); + u32 start_binding = binding; + const auto module = CompileModule(program->info, pgm->Code(), 0, binding); + program->modules.emplace_back(module, + StageSpecialization{program->info, start_binding}); + it_pgm.value() = program; + return std::make_tuple(&program->info, module, HashCombine(hash, 0)); + } + + Program* program = it_pgm->second; + const auto& info = program->info; + size_t perm_idx = program->modules.size(); + StageSpecialization spec{info, binding}; + vk::ShaderModule module{}; + + const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); + if (it == program->modules.end()) { + auto new_info = MakeShaderInfo(stage, pgm->user_data, info.pgm_base, info.pgm_hash, + liverpool->regs); + module = CompileModule(new_info, pgm->Code(), perm_idx, binding); + program->modules.emplace_back(module, std::move(spec)); + } else { + binding += info.NumBindings(); + module = it->module; + perm_idx = std::distance(program->modules.begin(), it); + } + return std::make_tuple(&info, module, HashCombine(hash, perm_idx)); + } + +private: + const Instance& instance; + AmdGpu::Liverpool* liverpool; + Shader::Profile profile{}; + tsl::robin_map program_cache; + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + Common::ObjectPool program_pool; +}; + +} // namespace Vulkan