From d59b102b6f42cecad42b11e6573e4e8e31d4aaee Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 27 May 2024 01:07:46 +0300 Subject: [PATCH 1/2] video_core: Add image support --- CMakeLists.txt | 2 + src/common/config.cpp | 27 ++- src/common/config.h | 1 + src/core/libraries/gnmdriver/gnmdriver.cpp | 41 ++-- src/core/libraries/kernel/event_queue.cpp | 1 - src/core/libraries/kernel/libkernel.cpp | 3 +- src/core/memory.cpp | 2 + .../backend/spirv/emit_spirv.cpp | 4 +- .../backend/spirv/emit_spirv.h | 8 +- .../spirv/emit_spirv_context_get_set.cpp | 31 ++- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_instructions.h | 21 +- .../backend/spirv/spirv_emit_context.cpp | 125 ++++++++++-- .../backend/spirv/spirv_emit_context.h | 18 +- .../frontend/translate/scalar_memory.cpp | 17 +- .../frontend/translate/translate.cpp | 41 +++- .../frontend/translate/translate.h | 6 + .../frontend/translate/vector_alu.cpp | 43 +++- .../frontend/translate/vector_memory.cpp | 27 ++- src/shader_recompiler/ir/attribute.cpp | 2 + src/shader_recompiler/ir/ir_emitter.cpp | 10 +- src/shader_recompiler/ir/ir_emitter.h | 3 +- src/shader_recompiler/ir/opcodes.inc | 3 +- .../ir/passes/constant_propogation_pass.cpp | 18 +- .../ir/passes/resource_tracking_pass.cpp | 97 ++++++++- src/shader_recompiler/ir/reg.h | 12 +- src/shader_recompiler/recompiler.cpp | 7 - src/shader_recompiler/recompiler.h | 19 -- src/shader_recompiler/runtime_info.h | 21 +- src/video_core/amdgpu/liverpool.h | 73 ++++++- src/video_core/amdgpu/resource.h | 192 ++++++++++++++++++ .../renderer_vulkan/liverpool_to_vk.cpp | 174 +++++++++++++++- .../renderer_vulkan/liverpool_to_vk.h | 17 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 142 ++++++++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 11 +- .../renderer_vulkan/vk_instance.cpp | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 57 ++++-- .../renderer_vulkan/vk_pipeline_cache.h | 4 + .../renderer_vulkan/vk_rasterizer.cpp | 38 ++-- src/video_core/texture_cache/image.cpp | 31 ++- src/video_core/texture_cache/image.h | 2 + src/video_core/texture_cache/image_view.cpp | 51 +++++ src/video_core/texture_cache/image_view.h | 4 + src/video_core/texture_cache/sampler.cpp | 32 +++ src/video_core/texture_cache/sampler.h | 34 ++++ .../texture_cache/texture_cache.cpp | 26 ++- src/video_core/texture_cache/texture_cache.h | 9 + src/video_core/texture_cache/tile_manager.cpp | 1 + 48 files changed, 1264 insertions(+), 259 deletions(-) create mode 100644 src/video_core/texture_cache/sampler.cpp create mode 100644 src/video_core/texture_cache/sampler.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 150c1001..520ba4c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -401,6 +401,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/image.h src/video_core/texture_cache/image_view.cpp src/video_core/texture_cache/image_view.h + src/video_core/texture_cache/sampler.cpp + src/video_core/texture_cache/sampler.h src/video_core/texture_cache/slot_vector.h src/video_core/texture_cache/texture_cache.cpp src/video_core/texture_cache/texture_cache.h diff --git a/src/common/config.cpp b/src/common/config.cpp index afabba03..2da0844e 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -9,16 +9,17 @@ namespace Config { -bool isNeo = false; -u32 screenWidth = 1280; -u32 screenHeight = 720; -s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select -std::string logFilter; -std::string logType = "sync"; -bool isDebugDump = false; -bool isLibc = true; -bool isShowSplash = false; -bool isNullGpu = false; +static bool isNeo = false; +static u32 screenWidth = 1280; +static u32 screenHeight = 720; +static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select +static std::string logFilter; +static std::string logType = "sync"; +static bool isDebugDump = false; +static bool isLibc = true; +static bool isShowSplash = false; +static bool isNullGpu = false; +static bool shouldDumpShaders = false; bool isLleLibc() { return isLibc; @@ -59,6 +60,10 @@ bool nullGpu() { return isNullGpu; } +bool dumpShaders() { + return shouldDumpShaders; +} + void load(const std::filesystem::path& path) { // If the configuration file does not exist, create it and return std::error_code error; @@ -96,6 +101,7 @@ void load(const std::filesystem::path& path) { screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); gpuId = toml::find_or(gpu, "gpuId", 0); isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); } } if (data.contains("Debug")) { @@ -142,6 +148,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; + data["GPU"]["dumpShaders"] = shouldDumpShaders; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; diff --git a/src/common/config.h b/src/common/config.h index 803e9409..8a8db451 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -22,5 +22,6 @@ bool debugDump(); bool isLleLibc(); bool showSplash(); bool nullGpu(); +bool dumpShaders(); }; // namespace Config diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 008ae189..67558945 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -881,29 +881,36 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) { +s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, + u32 shader_modifier) { LOG_TRACE(Lib_GnmDriver, "called"); // A fullscreen triangle with one uv set - const static u32 shader_code[] = { - 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 - 0x36020081u, // v_and_b32 v1, 1, v0 - 0x34020281u, // v_lshlrev_b32 v1, 1, v1 - 0x360000c2u, // v_and_b32 v0, -2, v0 - 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 - 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 - 0x7e020b01u, // v_cvt_f32_i32 v1, v1 - 0x7e040280u, // v_cvt_f32_i32 v0, v0 - 0x7e0602f2u, // v_mov_b32 v3, 1.0 - 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done - 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 - 0xbf810000u, // s_endpgm + // clang-format off + constexpr static std::array shader_code alignas(256) = { + 0xbeeb03ffu, 0x00000009u, // s_mov_b32 vcc_hi, lit(9) + 0x36020081u, // v_and_b32 v1, 1, v0 + 0x36000082u, // v_and_b32 v0, 2, v0 + 0x7e000d00u, // v_cvt_f32_u32 v0, v0 + 0x7e040d01u, // v_cvt_f32_u32 v2, v1 + 0xd2820003u, 0x3ce00f4u, // v_mad_f32 v3, 2.0, v0, -1.0 + 0xd2820004u, 0x3ce04f6u, // v_mad_f32 v4, 4.0, v2, -1.0 + 0x7e020280u, // v_mov_b32 v1, 0 + 0x7e0a02f2u, // v_mov_b32 v5, 1.0 + 0xf80008cfu, 0x5010403u, // exp pos0, v3, v4, v1, v5 done + 0x100404f4u, // v_mul_f32 v2, 2.0, v2 + 0xf800020fu, 0x1010200u, // exp param0, v0, v2, v1, v1 + 0xbf810000u, // s_endpgm + 0x302u, + 0x46d611cu, // OrbShdr header - 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, - 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u}; + 0x5362724fu, 0x7726468u, 0x4845u, 0x5080002u, 0xd1e7de61u, 0x0u, 0xb9cae598u, + }; + // clang-format on - const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10 + const auto shader_addr = uintptr_t(shader_code.data()); // Original address is 0xfe000f10 + ASSERT((shader_addr & 0xFF) == 0); const static u32 vs_regs[] = { u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 9243609d..faa29541 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" -#include "common/debug.h" #include "core/libraries/kernel/event_queue.h" namespace Libraries::Kernel { diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 0f847f1a..3f3a3624 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -35,7 +35,8 @@ static void* PS4_SYSV_ABI sceKernelGetProcParam() { } int32_t PS4_SYSV_ABI sceKernelReleaseDirectMemory(off_t start, size_t len) { - UNREACHABLE(); + auto* memory = Core::Memory::Instance(); + memory->Free(start, len); return 0; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 83758688..7838b9cc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -199,6 +199,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) { } void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { + return; const vk::Device device = instance->GetDevice(); const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); void* host_pointer = reinterpret_cast(addr); @@ -270,6 +271,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { } void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { + return; const auto it = mapped_memories.find(addr); ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); mapped_memories.erase(it); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index fb9c67d6..39aea9c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -218,8 +218,8 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& bindings) { - EmitContext ctx{profile, program, bindings}; +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { + EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (program.info.stage == Stage::Vertex) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index f99ad86d..e513975b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,18 +4,12 @@ #pragma once #include -#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - Bindings& bindings); - -[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { - Bindings binding; - return EmitSPIRV(profile, program, binding); -} + u32& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1acfebe8..b5011218 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -61,14 +61,11 @@ Id EmitReadConst(EmitContext& ctx) { throw LogicError("Unreachable instruction"); } -Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr, - const IR::Value& offset) { - throw LogicError("Unreachable instruction"); -} - -Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr, - const IR::Value& offset) { - throw LogicError("Unreachable instruction"); +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { + const Id buffer = ctx.buffers[handle]; + const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32; + const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)}; + return ctx.OpLoad(ctx.F32[1], ptr); } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { @@ -99,32 +96,28 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen ctx.OpStore(pointer, value); } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address) { +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { UNREACHABLE(); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address) { +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { UNREACHABLE(); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address) { +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { UNREACHABLE(); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address) { +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto info = inst->Flags(); - const Id buffer = ctx.buffers[handle.U32()]; - const Id type = ctx.info.buffers[handle.U32()].is_storage ? ctx.storage_f32 : ctx.uniform_f32; + const Id buffer = ctx.buffers[handle]; + const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32; if (info.index_enable && info.offset_enable) { UNREACHABLE(); } else if (info.index_enable) { boost::container::static_vector ids; for (u32 i = 0; i < 4; i++) { - const Id index{ctx.OpIAdd(ctx.U32[1], ctx.Def(address), ctx.ConstU32(i))}; + const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)}; ids.push_back(ctx.OpLoad(ctx.F32[1], ptr)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 68066916..ac8f22af 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -6,9 +6,14 @@ namespace Shader::Backend::SPIRV { -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + const auto info = inst->Flags(); + return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0d23312d..77416e7f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,18 +43,11 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); Id EmitReadConst(EmitContext& ctx); -Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, - const IR::Value& offset); -Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, - const IR::Value& offset); -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address); -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address); -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address); -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle, - const IR::Value& address); +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); @@ -319,8 +312,8 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod, const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index a7d3725c..4b9e696e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -35,17 +35,14 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding_) : Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_}, - stage{program.info.stage} { - u32& uniform_binding{bindings.unified}; - u32& storage_binding{bindings.unified}; - u32& texture_binding{bindings.unified}; - u32& image_binding{bindings.unified}; + stage{program.info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(program); DefineBuffers(program.info); + DefineImagesAndSamplers(program.info); } EmitContext::~EmitContext() = default; @@ -235,16 +232,15 @@ void EmitContext::DefineOutputs(const Info& info) { } void EmitContext::DefineBuffers(const Info& info) { - const auto define_buffer = [&](const BufferResource& buffer, Id type, u32 element_size, - char type_char, u32 index) { - ASSERT(buffer.stride % element_size == 0); - const u32 num_elements = buffer.stride * buffer.num_records / element_size; + for (u32 i = 0; const auto& buffer : info.buffers) { + ASSERT(True(buffer.used_types & IR::Type::F32)); + ASSERT(buffer.stride % sizeof(float) == 0); + const u32 num_elements = buffer.stride * buffer.num_records / sizeof(float); const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))}; - Decorate(record_array_type, spv::Decoration::ArrayStride, element_size); + Decorate(record_array_type, spv::Decoration::ArrayStride, sizeof(float)); const Id struct_type{TypeStruct(record_array_type)}; - const auto name = - fmt::format("{}_cbuf_block_{}{}", stage, type_char, element_size * CHAR_BIT); + const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); Name(struct_type, name); Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); @@ -254,25 +250,112 @@ void EmitContext::DefineBuffers(const Info& info) { buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform; const Id struct_pointer_type{TypePointer(storage_class, struct_type)}; if (buffer.is_storage) { - storage_f32 = TypePointer(storage_class, type); + storage_f32 = TypePointer(storage_class, F32[1]); } else { - uniform_f32 = TypePointer(storage_class, type); + uniform_f32 = TypePointer(storage_class, F32[1]); } const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", index)); + Name(id, fmt::format("c{}", i)); binding++; buffers.push_back(id); interfaces.push_back(id); - }; - - for (u32 i = 0; const auto& buffer : info.buffers) { - ASSERT(True(buffer.used_types & IR::Type::F32)); - define_buffer(buffer, F32[1], 4, 'f', i); i++; } } +Id ImageType(EmitContext& ctx, const ImageResource& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + const bool depth{desc.is_depth}; + switch (desc.type) { + case AmdGpu::ImageType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Color1DArray: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DMsaa: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, + desc.type == AmdGpu::ImageType::Color2DMsaa, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color2DMsaaArray: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, + desc.type == AmdGpu::ImageType::Color2DMsaaArray, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Cube: + return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format, + spv::AccessQualifier::ReadOnly); + case AmdGpu::ImageType::Buffer: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { + const auto format = spv::ImageFormat::Unknown; // Read this from tsharp? + switch (desc.type) { + case AmdGpu::ImageType::Color1D: + return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 1, format); + case AmdGpu::ImageType::Color1DArray: + return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 1, format); + case AmdGpu::ImageType::Color2D: + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 1, format); + case AmdGpu::ImageType::Color2DArray: + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 1, format); + case AmdGpu::ImageType::Color3D: + return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, 2, format); + case AmdGpu::ImageType::Buffer: + throw NotImplementedException("Image buffer"); + default: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +void EmitContext::DefineImagesAndSamplers(const Info& info) { + for (const auto& image_desc : info.images) { + const Id sampled_type{image_desc.nfmt == AmdGpu::NumberFormat::Uint ? U32[1] : F32[1]}; + const Id image_type{ImageType(*this, image_desc, sampled_type)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base, + image_desc.dword_offset)); + images.push_back({ + .id = id, + .sampled_type = TypeSampledImage(image_type), + .pointer_type = pointer_type, + .image_type = image_type, + }); + interfaces.push_back(id); + ++binding; + } + + if (info.samplers.empty()) { + return; + } + + sampler_type = TypeSampler(); + sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type); + for (const auto& samp_desc : info.samplers) { + const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base, + samp_desc.dword_offset)); + samplers.push_back(id); + interfaces.push_back(id); + ++binding; + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1baf7fa1..ba0deca2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,7 +6,6 @@ #include #include -#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" @@ -29,7 +28,7 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); + explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -152,8 +151,20 @@ public: Id base_vertex{}; std::array frag_color{}; - u32 binding{}; + struct TextureDefinition { + Id id; + Id sampled_type; + Id pointer_type; + Id image_type; + }; + + u32& binding; boost::container::small_vector buffers; + boost::container::small_vector images; + boost::container::small_vector samplers; + + Id sampler_type{}; + Id sampler_pointer_type{}; struct SpirvAttribute { Id id; @@ -170,6 +181,7 @@ private: void DefineInputs(const Info& info); void DefineOutputs(const Info& info); void DefineBuffers(const Info& info); + void DefineImagesAndSamplers(const Info& info); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index dc02dfd2..a3a2a676 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -8,15 +8,18 @@ namespace Shader::Gcn { void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg, const IR::U32U64& address) { for (u32 i = 0; i < num_dwords; i++) { - const IR::U32 value = handle.IsEmpty() ? ir.ReadConst(address, ir.Imm32(i)) - : ir.ReadConstBuffer(handle, address, ir.Imm32(i)); - ir.SetScalarReg(dst_reg++, value); + if (handle.IsEmpty()) { + ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i))); + } else { + const IR::U32 index = ir.IAdd(address, ir.Imm32(i)); + ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index)); + } } } void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; - const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2); + const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::U32 offset = smrd.imm ? ir.Imm32(smrd.offset * 4) : IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), @@ -30,14 +33,12 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; - const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2); + const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::U32 offset = smrd.imm ? ir.Imm32(smrd.offset * 4) : IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2))}; - const IR::Value vsharp = - ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1), - ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3)); + const IR::Value vsharp = ir.GetScalarReg(sbase); const IR::ScalarReg dst_reg{inst.dst[0].code}; Load(ir, num_dwords, vsharp, dst_reg, offset); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 453bdcc2..cd42d1e0 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -73,8 +73,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { return ir.Imm32(1.f); case OperandField::ConstFloatPos_0_5: return ir.Imm32(0.5f); + case OperandField::ConstFloatPos_2_0: + return ir.Imm32(2.0f); + case OperandField::ConstFloatPos_4_0: + return ir.Imm32(4.0f); case OperandField::ConstFloatNeg_0_5: return ir.Imm32(-0.5f); + case OperandField::ConstFloatNeg_1_0: + return ir.Imm32(-1.0f); default: UNREACHABLE(); } @@ -135,6 +141,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_MUL_I32: translator.S_MUL_I32(inst); break; + case Opcode::V_MAD_F32: + translator.V_MAD_F32(inst); + break; case Opcode::V_MOV_B32: translator.V_MOV(inst); break; @@ -144,12 +153,39 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MUL_F32: translator.V_MUL_F32(inst); break; + case Opcode::V_AND_B32: + translator.V_AND_B32(inst); + break; + case Opcode::V_LSHLREV_B32: + translator.V_LSHLREV_B32(inst); + break; + case Opcode::V_ADD_I32: + translator.V_ADD_I32(inst); + break; + case Opcode::V_CVT_F32_I32: + translator.V_CVT_F32_I32(inst); + break; + case Opcode::V_CVT_F32_U32: + translator.V_CVT_F32_U32(inst); + break; case Opcode::S_SWAPPC_B64: ASSERT(info.stage == Stage::Vertex); translator.EmitFetch(inst); break; case Opcode::S_WAITCNT: - break; // Ignore for now. + break; + case Opcode::S_BUFFER_LOAD_DWORD: + translator.S_BUFFER_LOAD_DWORD(1, inst); + break; + case Opcode::S_BUFFER_LOAD_DWORDX2: + translator.S_BUFFER_LOAD_DWORD(2, inst); + break; + case Opcode::S_BUFFER_LOAD_DWORDX4: + translator.S_BUFFER_LOAD_DWORD(4, inst); + break; + case Opcode::S_BUFFER_LOAD_DWORDX8: + translator.S_BUFFER_LOAD_DWORD(8, inst); + break; case Opcode::S_BUFFER_LOAD_DWORDX16: translator.S_BUFFER_LOAD_DWORD(16, inst); break; @@ -180,7 +216,8 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_ENDPGM: break; default: - UNREACHABLE_MSG("Unknown opcode {}", u32(inst.opcode)); + const u32 opcode = u32(inst.opcode); + UNREACHABLE_MSG("Unknown opcode {}", opcode); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ca3166eb..a837f3a1 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -47,6 +47,12 @@ public: void V_MUL_F32(const GcnInst& inst); void V_CMP_EQ_U32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); + void V_AND_B32(const GcnInst& inst); + void V_LSHLREV_B32(const GcnInst& inst); + void V_ADD_I32(const GcnInst& inst); + void V_CVT_F32_I32(const GcnInst& inst); + void V_CVT_F32_U32(const GcnInst& inst); + void V_MAD_F32(const GcnInst& inst); // Vector Memory void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 99cebdd2..9e36cdc3 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#pragma clang optimize off + #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -61,4 +61,45 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, IR::U32F32{result}); } +void Translator::V_AND_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1)); +} + +void Translator::V_LSHLREV_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, src0)); +} + +void Translator::V_ADD_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1)); + // TODO: Carry +} + +void Translator::V_CVT_F32_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.ConvertSToF(32, 32, src0)); +} + +void Translator::V_CVT_F32_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0)); +} + +void Translator::V_MAD_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index d87e957a..a5fb50b9 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -63,27 +63,34 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler // Since these are at most 4 dwords, we load them into a single uvec4 and place them // in coords field of the instruction. Then the resource tracking pass will patch the - // IR instruction to fill in lod_clamp field. The vector can also be used - // as coords directly as SPIR-V will ignore any extra parameters. - const IR::Value body = - ir.CompositeConstruct(ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++), - ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++)); + // IR instruction to fill in lod_clamp field. + const IR::Value body = ir.CompositeConstruct( + ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + + const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); + + IR::TextureInstInfo info{}; + info.is_depth.Assign(flags.test(MimgModifier::Pcf)); + info.has_bias.Assign(flags.test(MimgModifier::LodBias)); + info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); + info.force_level0.Assign(flags.test(MimgModifier::Level0)); + info.explicit_lod.Assign(explicit_lod); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; - const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); if (!flags.test(MimgModifier::Pcf)) { if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, lod, offset, {}); + return ir.ImageSampleExplicitLod(handle, body, lod, offset, info); } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, {}); + return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info); } } if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, {}); + return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info); } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, {}); + return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info); }(); for (u32 i = 0; i < 4; i++) { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 3b60bf65..8e0db125 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -110,6 +110,8 @@ std::string NameOf(Attribute attribute) { return "InstanceId"; case Attribute::FragCoord: return "FragCoord"; + case Attribute::IsFrontFace: + return "IsFrontFace"; default: break; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 26c9ce2a..69ad9a3c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -227,14 +227,8 @@ U32 IREmitter::ReadConst(const U64& address, const U32& offset) { return Inst(Opcode::ReadConst, address, offset); } -template <> -U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) { - return Inst(Opcode::ReadConstBuffer, handle, index, offset); -} - -template <> -F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) { - return Inst(Opcode::ReadConstBufferF32, handle, index, offset); +F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { + return Inst(Opcode::ReadConstBuffer, handle, index); } Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index a445f06a..a6023646 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -68,8 +68,7 @@ public: void WriteShared(int bit_size, const Value& value, const U32& offset); [[nodiscard]] U32 ReadConst(const U64& address, const U32& offset); - template - [[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset); + [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 929fac42..4a6e0447 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -15,8 +15,7 @@ OPCODE(Epilogue, Void, // Constant memory operations OPCODE(ReadConst, U32, U64, U32, ) -OPCODE(ReadConstBuffer, U32, Opaque, U32, U32 ) -OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 ) +OPCODE(ReadConstBuffer, F32, Opaque, U32, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index 40ee1c2b..82f074dd 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -88,15 +88,15 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } - if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) { - // Replace the bitcast with a typed constant buffer read - inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32); - inst.SetArg(0, arg_inst->Arg(0)); - inst.SetArg(1, arg_inst->Arg(1)); - return; - } - } + // if constexpr (op == IR::Opcode::BitCastF32U32) { + // if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) { + // // Replace the bitcast with a typed constant buffer read + // inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32); + // inst.SetArg(0, arg_inst->Arg(0)); + // inst.SetArg(1, arg_inst->Arg(1)); + // return; + // } + // } } std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 79de4680..e0a66232 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -28,7 +28,6 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x4: case IR::Opcode::ReadConstBuffer: - case IR::Opcode::ReadConstBufferF32: return true; default: return false; @@ -41,6 +40,7 @@ IR::Type BufferLoadType(const IR::Inst& inst) { case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x4: + case IR::Opcode::ReadConstBuffer: return IR::Type::F32; default: UNREACHABLE(); @@ -69,8 +69,10 @@ bool IsImageInstruction(const IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(BufferResourceList& buffer_resources_) - : buffer_resources{buffer_resources_} {} + explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_, + SamplerResourceList& sampler_resources_) + : buffer_resources{buffer_resources_}, image_resources{image_resources_}, + sampler_resources{sampler_resources_} {} u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { @@ -84,6 +86,23 @@ public: return index; } + u32 Add(const ImageResource& desc) { + const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { + return desc.sgpr_base == existing.sgpr_base && + desc.dword_offset == existing.dword_offset && desc.type == existing.type && + desc.is_storage == existing.is_storage; + })}; + return index; + } + + u32 Add(const SamplerResource& desc) { + const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) { + return desc.sgpr_base == existing.sgpr_base && + desc.dword_offset == existing.dword_offset; + })}; + return index; + } + private: template static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { @@ -96,6 +115,8 @@ private: } BufferResourceList& buffer_resources; + ImageResourceList& image_resources; + SamplerResourceList& sampler_resources; }; } // Anonymous namespace @@ -118,8 +139,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) { // Retrieve SGPR that holds sbase inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive(); - ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister, - "Nested resource loads not supported"); + ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported"); const IR::ScalarReg base = inst->Arg(0).ScalarReg(); // Return retrieved location. @@ -140,7 +160,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, .stride = u32(buffer.stride), .num_records = u32(buffer.num_records), .used_types = BufferLoadType(inst), - .is_storage = buffer.base_address % 64 != 0, + .is_storage = /*buffer.base_address % 64 != 0*/ true, }); const auto inst_info = inst.Flags(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -151,6 +171,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32); } + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { + return; + } // Calculate buffer address. const u32 dword_stride = buffer.stride / sizeof(u32); const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32); @@ -160,19 +183,79 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } else if (inst_info.index_enable) { const IR::U32 index{inst.Arg(1)}; address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); + } else if (inst_info.offset_enable) { + const IR::U32 offset{inst.Arg(1)}; } inst.SetArg(1, address); } +void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { + IR::Inst* producer = inst.Arg(0).InstRecursive(); + ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); + + // Read image sharp. + const auto tsharp = TrackSharp(producer->Arg(0).InstRecursive()); + const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); + const auto inst_info = inst.Flags(); + const u32 image_binding = descriptors.Add(ImageResource{ + .sgpr_base = tsharp.sgpr_base, + .dword_offset = tsharp.dword_offset, + .type = image.type, + .nfmt = static_cast(image.num_format.Value()), + .is_storage = false, + .is_depth = bool(inst_info.is_depth), + }); + + // Read sampler sharp. + const auto ssharp = TrackSharp(producer->Arg(1).InstRecursive()); + const u32 sampler_binding = descriptors.Add(SamplerResource{ + .sgpr_base = ssharp.sgpr_base, + .dword_offset = ssharp.dword_offset, + }); + + // Patch image handle + const u32 handle = image_binding | (sampler_binding << 16); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(handle)); + + // Now that we know the image type, adjust texture coordinate vector. + const IR::Inst* body = inst.Arg(1).InstRecursive(); + const auto [coords, arg] = [&] -> std::pair { + switch (image.type) { + case AmdGpu::ImageType::Color1D: + return {body->Arg(0), body->Arg(1)}; + case AmdGpu::ImageType::Color1DArray: + case AmdGpu::ImageType::Color2D: + return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Cube: + return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; + default: + UNREACHABLE(); + } + }(); + inst.SetArg(1, coords); + + if (inst_info.has_lod_clamp) { + // Final argument contains lod_clamp + const u32 arg_pos = inst_info.is_depth ? 5 : 4; + inst.SetArg(arg_pos, arg); + } +} + void ResourceTrackingPass(IR::Program& program) { auto& info = program.info; - Descriptors descriptors{info.buffers}; + Descriptors descriptors{info.buffers, info.images, info.samplers}; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (IsBufferInstruction(inst)) { PatchBufferInstruction(*block, inst, info, descriptors); continue; } + if (IsImageInstruction(inst)) { + PatchImageInstruction(*block, inst, info, descriptors); + } } } } diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 89e78532..1b268611 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -33,13 +33,11 @@ union Mode { union TextureInstInfo { u32 raw; - BitField<0, 16, u32> descriptor_index; - BitField<19, 1, u32> is_depth; - BitField<20, 1, u32> has_bias; - BitField<21, 1, u32> has_lod_clamp; - BitField<22, 1, u32> relaxed_precision; - BitField<23, 2, u32> gather_component; - BitField<25, 2, u32> num_derivatives; + BitField<0, 1, u32> is_depth; + BitField<1, 1, u32> has_bias; + BitField<2, 1, u32> has_lod_clamp; + BitField<3, 1, u32> force_level0; + BitField<4, 1, u32> explicit_lod; }; union BufferInstInfo { diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index cc3a0eb2..97fff8fe 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -38,11 +37,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, std::span code, const Info&& info); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 3f8e1516..672e21d8 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,7 +10,7 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" -#include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/resource.h" namespace Shader { @@ -53,6 +53,22 @@ struct BufferResource { }; using BufferResourceList = boost::container::static_vector; +struct ImageResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::ImageType type; + AmdGpu::NumberFormat nfmt; + bool is_storage; + bool is_depth; +}; +using ImageResourceList = boost::container::static_vector; + +struct SamplerResource { + u32 sgpr_base; + u32 dword_offset; +}; +using SamplerResourceList = boost::container::static_vector; + struct Info { struct VsInput { AmdGpu::NumberFormat fmt; @@ -101,6 +117,9 @@ struct Info { AttributeFlags stores{}; BufferResourceList buffers; + ImageResourceList images; + SamplerResourceList samplers; + std::span user_data; Stage stage; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ad1984eb..9033fbab 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -63,6 +63,16 @@ struct Liverpool { const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; return reinterpret_cast(addr); } + + std::span Code() const { + u32 code_size = 1; + const u32* code = Address(); + static constexpr std::string_view PostHeader = "OrbShdr"; + while (std::memcmp(code + code_size, PostHeader.data(), PostHeader.size()) != 0) { + code_size++; + } + return std::span{code, code_size}; + } }; union PsInputControl { @@ -228,7 +238,7 @@ struct Liverpool { enum class ZFormat : u32 { Invald = 0, Z16 = 1, - Z32Float = 2, + Z32Float = 3, }; enum class StencilFormat : u32 { @@ -353,8 +363,9 @@ struct Liverpool { BitField<0, 8, u32> base_addr_hi; u32 base_addr_lo; - VAddr Address() const { - return base_addr_lo | u64(base_addr_hi) << 32; + template + T Address() const { + return reinterpret_cast(base_addr_lo | u64(base_addr_hi) << 32); } }; @@ -448,6 +459,53 @@ struct Liverpool { u32 data_w; }; + struct BlendConstants { + float red; + float green; + float blue; + float alpha; + }; + + union BlendControl { + enum class BlendFactor : u32 { + Zero = 0, + One = 1, + SrcColor = 2, + OneMinusSrcColor = 3, + SrcAlpha = 4, + OneMinusSrcAlpha = 5, + DstAlpha = 6, + OneMinusDstAlpha = 7, + DstColor = 8, + OneMinusDstColor = 9, + SrcAlphaSaturate = 10, + ConstantColor = 13, + OneMinusConstantColor = 14, + Src1Color = 15, + InvSrc1Color = 16, + Src1Alpha = 17, + InvSrc1Alpha = 18, + ConstantAlpha = 19, + OneMinusConstantAlpha = 20, + }; + + enum class BlendFunc : u32 { + Add = 0, + Subtract = 1, + Min = 2, + Max = 3, + }; + + BitField<0, 5, BlendFactor> color_src_factor; + BitField<5, 3, BlendFunc> color_func; + BitField<8, 5, BlendFactor> color_dst_factor; + BitField<16, 5, BlendFactor> alpha_src_factor; + BitField<21, 3, BlendFunc> alpha_func; + BitField<24, 5, BlendFactor> alpha_dst_factor; + BitField<29, 1, u32> separate_alpha_blend; + BitField<30, 1, u32> enable; + }; + struct ColorBuffer { enum class EndianSwap : u32 { None = 0, @@ -577,7 +635,9 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2); std::array viewport_scissors; std::array viewport_depths; - INSERT_PADDING_WORDS(0xA10B - 0xA0D4); + INSERT_PADDING_WORDS(0xA105 - 0xA0D4); + BlendConstants blend_constants; + INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4); StencilControl stencil_control; StencilRefMask stencil_ref_front; StencilRefMask stencil_ref_back; @@ -593,7 +653,9 @@ struct Liverpool { ShaderPosFormat shader_pos_format; ShaderExportFormat z_export_format; ColorExportFormat color_export_format; - INSERT_PADDING_WORDS(0xA1F9 - 0xA1C3 - 3); + INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3); + std::array blend_control; + INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8); IndexBufferBase index_base_address; INSERT_PADDING_WORDS(1); u32 draw_initiator; @@ -732,6 +794,7 @@ static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); +static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 86fa0559..71469005 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -31,4 +31,196 @@ struct Buffer { }; }; +enum class ImageType : u64 { + Buffer = 0, + Color1D = 8, + Color2D = 9, + Color3D = 10, + Cube = 11, + Color1DArray = 12, + Color2DArray = 13, + Color2DMsaa = 14, + Color2DMsaaArray = 15, +}; + +constexpr std::string_view NameOf(ImageType type) { + switch (type) { + case ImageType::Buffer: + return "Buffer"; + case ImageType::Color1D: + return "Color1D"; + case ImageType::Color2D: + return "Color2D"; + case ImageType::Color3D: + return "Color3D"; + case ImageType::Cube: + return "Cube"; + case ImageType::Color1DArray: + return "Color1DArray"; + case ImageType::Color2DArray: + return "Color2DArray"; + case ImageType::Color2DMsaa: + return "Color2DMsaa"; + case ImageType::Color2DMsaaArray: + return "Color2DMsaaArray"; + default: + return "Unknown"; + } +} + +struct Image { + union { + BitField<0, 40, u64> base_address; + BitField<40, 12, u64> min_lod; + BitField<52, 6, u64> data_format; + BitField<58, 4, u64> num_format; + BitField<62, 2, u64> mtype; + }; + union { + BitField<0, 14, u64> width; + BitField<14, 14, u64> height; + BitField<28, 3, u64> perf_modulation; + BitField<31, 1, u64> interlaced; + BitField<32, 3, u64> dst_sel_x; + BitField<35, 3, u64> dst_sel_y; + BitField<38, 3, u64> dst_sel_z; + BitField<41, 3, u64> dst_sel_w; + BitField<44, 4, u64> base_level; + BitField<48, 4, u64> last_level; + BitField<52, 5, u64> tiling_index; + BitField<57, 1, u64> pow2pad; + BitField<58, 1, u64> mtype2; + BitField<59, 1, u64> atc; + BitField<60, 4, ImageType> type; + }; + + VAddr Address() const { + return base_address << 8; + } + + DataFormat GetDataFmt() const noexcept { + return static_cast(data_format.Value()); + } + + NumberFormat GetNumberFmt() const noexcept { + return static_cast(num_format.Value()); + } +}; + +// 8.2.7. Image Sampler [RDNA 2 Instruction Set Architecture] +enum class ClampMode : u64 { + Wrap = 0, + Mirror = 1, + ClampLastTexel = 2, + MirrorOnceLastTexel = 3, + ClampHalfBorder = 4, + MirrorOnceHalfBorder = 5, + ClampBorder = 6, + MirrorOnceBorder = 7, +}; + +enum class AnisoRatio : u64 { + One, + Two, + Four, + Eight, + Sixteen, +}; + +enum class DepthCompare : u64 { + Never = 0, + Less = 1, + Equal = 2, + LessEqual = 3, + Greater = 4, + NotEqual = 5, + GreaterEqual = 6, + Always = 7, +}; + +enum class FilterMode : u64 { + Blend = 0, + Min = 1, + Max = 2, +}; + +enum class Filter : u64 { + Point = 0, + Bilinear = 1, + AnisoPoint = 2, + AnisoLinear = 3, +}; + +enum class MipFilter : u64 { + None = 0, + Point = 1, + Linear = 2, +}; + +enum class BorderColor : u64 { + OpaqueBlack = 0, + TransparentBlack = 1, + White = 2, + Custom = 3, +}; + +// Table 8.12 Sampler Resource Definition +struct Sampler { + union { + BitField<0, 3, ClampMode> clamp_x; + BitField<3, 3, ClampMode> clamp_y; + BitField<6, 3, ClampMode> clamp_z; + BitField<9, 3, AnisoRatio> max_aniso; + BitField<12, 3, DepthCompare> depth_compare_func; + BitField<15, 1, u64> force_unnormalized; + BitField<16, 3, u64> aniso_threshold; + BitField<19, 1, u64> mc_coord_trunc; + BitField<20, 1, u64> force_degamma; + BitField<21, 6, u64> aniso_bias; + BitField<27, 1, u64> trunc_coord; + BitField<28, 1, u64> disable_cube_wrap; + BitField<29, 2, FilterMode> filter_mode; + BitField<32, 12, u64> min_lod; + BitField<44, 12, u64> max_lod; + BitField<56, 4, u64> perf_mip; + BitField<60, 4, u64> perf_z; + }; + union { + BitField<0, 14, u64> lod_bias; + BitField<14, 6, u64> lod_bias_sec; + BitField<20, 2, Filter> xy_mag_filter; + BitField<22, 2, Filter> xy_min_filter; + BitField<24, 2, u64> z_filter; + BitField<26, 2, MipFilter> mip_filter; + BitField<28, 1, u64> mip_point_preclamp; + BitField<29, 1, u64> disable_lsb_ceil; + BitField<30, 2, u64> unused0; + BitField<32, 12, u64> border_color_ptr; + BitField<42, 18, u64> unused1; + BitField<62, 2, BorderColor> border_color_type; + }; + + float LodBias() const noexcept { + return static_cast(lod_bias); + } + + float MinLod() const noexcept { + return static_cast(min_lod); + } + + float MaxLod() const noexcept { + return static_cast(max_lod); + } +}; + } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::ImageType type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); + } +}; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 906b937e..315015cc 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -6,6 +6,8 @@ namespace Vulkan::LiverpoolToVK { +using DepthBuffer = Liverpool::DepthBuffer; + vk::StencilOp StencilOp(Liverpool::StencilFunc op) { switch (op) { case Liverpool::StencilFunc::Keep: @@ -77,6 +79,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { case Liverpool::PrimitiveType::QuadList: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; + case Liverpool::PrimitiveType::RectList: + return vk::PrimitiveTopology::eTriangleList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; @@ -113,6 +117,161 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } +vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) { + using BlendFactor = Liverpool::BlendControl::BlendFactor; + switch (factor) { + case BlendFactor::Zero: + return vk::BlendFactor::eZero; + case BlendFactor::One: + return vk::BlendFactor::eOne; + case BlendFactor::SrcColor: + return vk::BlendFactor::eSrcColor; + case BlendFactor::OneMinusSrcColor: + return vk::BlendFactor::eOneMinusSrcColor; + case BlendFactor::SrcAlpha: + return vk::BlendFactor::eSrcAlpha; + case BlendFactor::OneMinusSrcAlpha: + return vk::BlendFactor::eOneMinusSrcAlpha; + case BlendFactor::DstAlpha: + return vk::BlendFactor::eDstAlpha; + case BlendFactor::OneMinusDstAlpha: + return vk::BlendFactor::eOneMinusDstAlpha; + case BlendFactor::DstColor: + return vk::BlendFactor::eDstColor; + case BlendFactor::OneMinusDstColor: + return vk::BlendFactor::eOneMinusDstColor; + case BlendFactor::SrcAlphaSaturate: + return vk::BlendFactor::eSrcAlphaSaturate; + case BlendFactor::ConstantColor: + return vk::BlendFactor::eConstantColor; + case BlendFactor::OneMinusConstantColor: + return vk::BlendFactor::eOneMinusConstantColor; + case BlendFactor::Src1Color: + return vk::BlendFactor::eSrc1Color; + case BlendFactor::InvSrc1Color: + return vk::BlendFactor::eOneMinusSrc1Color; + case BlendFactor::Src1Alpha: + return vk::BlendFactor::eSrc1Alpha; + case BlendFactor::InvSrc1Alpha: + return vk::BlendFactor::eOneMinusSrc1Alpha; + case BlendFactor::ConstantAlpha: + return vk::BlendFactor::eConstantAlpha; + case BlendFactor::OneMinusConstantAlpha: + return vk::BlendFactor::eOneMinusConstantAlpha; + default: + UNREACHABLE(); + } +} + +vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { + using BlendFunc = Liverpool::BlendControl::BlendFunc; + switch (func) { + case BlendFunc::Add: + return vk::BlendOp::eAdd; + case BlendFunc::Subtract: + return vk::BlendOp::eSubtract; + case BlendFunc::Min: + return vk::BlendOp::eMin; + case BlendFunc::Max: + return vk::BlendOp::eMax; + default: + UNREACHABLE(); + } +} + +// https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21 +vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) { + switch (mode) { + case AmdGpu::ClampMode::Wrap: + return vk::SamplerAddressMode::eRepeat; + case AmdGpu::ClampMode::Mirror: + return vk::SamplerAddressMode::eMirroredRepeat; + case AmdGpu::ClampMode::ClampLastTexel: + return vk::SamplerAddressMode::eClampToEdge; + case AmdGpu::ClampMode::MirrorOnceLastTexel: + return vk::SamplerAddressMode::eMirrorClampToEdge; + case AmdGpu::ClampMode::ClampBorder: + return vk::SamplerAddressMode::eClampToBorder; + default: + UNREACHABLE(); + } +} + +vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp) { + switch (comp) { + case AmdGpu::DepthCompare::Never: + return vk::CompareOp::eNever; + case AmdGpu::DepthCompare::Less: + return vk::CompareOp::eLess; + case AmdGpu::DepthCompare::Equal: + return vk::CompareOp::eEqual; + case AmdGpu::DepthCompare::LessEqual: + return vk::CompareOp::eLessOrEqual; + case AmdGpu::DepthCompare::Greater: + return vk::CompareOp::eGreater; + case AmdGpu::DepthCompare::NotEqual: + return vk::CompareOp::eNotEqual; + case AmdGpu::DepthCompare::GreaterEqual: + return vk::CompareOp::eGreaterOrEqual; + case AmdGpu::DepthCompare::Always: + return vk::CompareOp::eAlways; + } +} + +vk::Filter Filter(AmdGpu::Filter filter) { + switch (filter) { + case AmdGpu::Filter::Point: + case AmdGpu::Filter::AnisoPoint: + return vk::Filter::eNearest; + case AmdGpu::Filter::Bilinear: + case AmdGpu::Filter::AnisoLinear: + return vk::Filter::eLinear; + default: + UNREACHABLE(); + } +} + +vk::SamplerReductionMode FilterMode(AmdGpu::FilterMode mode) { + switch (mode) { + case AmdGpu::FilterMode::Blend: + return vk::SamplerReductionMode::eWeightedAverage; + case AmdGpu::FilterMode::Min: + return vk::SamplerReductionMode::eMin; + case AmdGpu::FilterMode::Max: + return vk::SamplerReductionMode::eMax; + default: + UNREACHABLE(); + } +} + +vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter) { + switch (filter) { + case AmdGpu::MipFilter::Point: + return vk::SamplerMipmapMode::eNearest; + case AmdGpu::MipFilter::Linear: + return vk::SamplerMipmapMode::eLinear; + case AmdGpu::MipFilter::None: + return vk::SamplerMipmapMode::eNearest; + default: + UNREACHABLE(); + } +} + +vk::BorderColor BorderColor(AmdGpu::BorderColor color) { + switch (color) { + case AmdGpu::BorderColor::OpaqueBlack: + return vk::BorderColor::eFloatOpaqueBlack; + case AmdGpu::BorderColor::TransparentBlack: + return vk::BorderColor::eFloatTransparentBlack; + case AmdGpu::BorderColor::White: + return vk::BorderColor::eFloatOpaqueWhite; + case AmdGpu::BorderColor::Custom: + return vk::BorderColor::eFloatCustomEXT; + default: + UNREACHABLE(); + } +} + vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { @@ -130,11 +289,22 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eR8G8B8A8Srgb; } + if (data_format == AmdGpu::DataFormat::Format32_32_32 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32B32Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32_32 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32Sfloat; + } UNREACHABLE(); } -vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, - Liverpool::DepthBuffer::StencilFormat stencil_format) { +vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { + if (z_format == DepthBuffer::ZFormat::Z32Float && + stencil_format == DepthBuffer::StencilFormat::Stencil8) { + return vk::Format::eD32SfloatS8Uint; + } UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 38f021fd..27569c4f 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -5,6 +5,7 @@ #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { @@ -21,6 +22,22 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor); + +vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); + +vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); + +vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp); + +vk::Filter Filter(AmdGpu::Filter filter); + +vk::SamplerReductionMode FilterMode(AmdGpu::FilterMode mode); + +vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter); + +vk::BorderColor BorderColor(AmdGpu::BorderColor color); + vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e68a8f..bede2225 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -10,6 +10,8 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { @@ -25,8 +27,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } stages[i] = *infos[i]; } - - desc_layout = BuildSetLayout(); + BuildDescSetLayout(); const vk::DescriptorSetLayout set_layout = *desc_layout; const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, @@ -81,20 +82,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .sampleShadingEnable = false, }; - const vk::PipelineColorBlendAttachmentState colorblend_attachment = { - .blendEnable = false, - .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, - }; - - const vk::PipelineColorBlendStateCreateInfo color_blending = { - .logicOpEnable = false, - .logicOp = vk::LogicOp::eCopy, - .attachmentCount = 1, - .pAttachments = &colorblend_attachment, - .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, - }; - const vk::Viewport viewport = { .x = 0.0f, .y = 0.0f, @@ -119,6 +106,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector dynamic_states = { vk::DynamicState::eViewport, vk::DynamicState::eScissor, + vk::DynamicState::eBlendConstants, }; const vk::PipelineDynamicStateCreateInfo dynamic_info = { @@ -174,6 +162,30 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .stencilAttachmentFormat = vk::Format::eUndefined, }; + std::array attachments; + for (u32 i = 0; i < num_color_formats; i++) { + const auto& control = key.blend_controls[i]; + attachments[i] = vk::PipelineColorBlendAttachmentState{ + .blendEnable = key.blend_controls[i].enable, + .srcColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_src_factor), + .dstColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor), + .colorBlendOp = LiverpoolToVK::BlendOp(control.color_func), + .srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor), + .dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor), + .alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func), + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }; + } + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .logicOp = vk::LogicOp::eCopy, + .attachmentCount = num_color_formats, + .pAttachments = attachments.data(), + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + const vk::GraphicsPipelineCreateInfo pipeline_info = { .pNext = &pipeline_rendering_ci, .stageCount = shader_count, @@ -199,14 +211,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul GraphicsPipeline::~GraphicsPipeline() = default; -vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const { +void GraphicsPipeline::BuildDescSetLayout() { u32 binding{}; boost::container::small_vector bindings; for (const auto& stage : stages) { for (const auto& buffer : stage.buffers) { bindings.push_back({ .binding = binding++, - .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + }); + } + for (const auto& image : stage.images) { + bindings.push_back({ + .binding = binding++, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + }); + } + for (const auto& sampler : stage.samplers) { + bindings.push_back({ + .binding = binding++, + .descriptorType = vk::DescriptorType::eSampler, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, }); @@ -217,12 +246,24 @@ vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const { .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }; - return instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); + desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); } -void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { +void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, + VideoCore::TextureCache& texture_cache) const { + static constexpr u64 MinUniformAlignment = 64; + + const auto map_staging = [&](auto src, size_t size) { + const auto [data, offset, _] = staging.Map(size, MinUniformAlignment); + std::memcpy(data, reinterpret_cast(src), size); + staging.Commit(size); + return offset; + }; + std::array buffers; std::array offsets; + VAddr base_address = 0; + u32 start_offset = 0; // Bind vertex buffer. const auto& vs_info = stages[0]; @@ -230,38 +271,77 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { for (u32 i = 0; i < num_buffers; ++i) { const auto& input = vs_info.vs_inputs[i]; const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); - std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address); + if (i == 0) { + start_offset = + map_staging(buffer.base_address.Value(), buffer.stride * buffer.num_records); + base_address = buffer.base_address; + } + buffers[i] = staging.Handle(); + offsets[i] = start_offset + buffer.base_address - base_address; } const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); + if (num_buffers > 0) { + cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); + } // Bind resource buffers and textures. boost::container::static_vector buffer_infos; + boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; for (const auto& stage : stages) { for (const auto& buffer : stage.buffers) { const auto vsharp = stage.ReadUd(buffer.sgpr_base, buffer.dword_offset); - const auto [vk_buffer, offset] = memory->GetVulkanBuffer(vsharp.base_address); - buffer_infos.push_back({ - .buffer = vk_buffer, - .offset = offset, - .range = vsharp.stride * vsharp.num_records, - }); + const u32 size = vsharp.stride * vsharp.num_records; + const u32 offset = map_staging(vsharp.base_address.Value(), size); + buffer_infos.emplace_back(staging.Handle(), offset, size); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding, + .dstBinding = binding++, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); } + + for (const auto& image : stage.images) { + const auto tsharp = stage.ReadUd(image.sgpr_base, image.dword_offset); + const auto& image_view = texture_cache.FindImageView(tsharp); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + vk::ImageLayout::eGeneral); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .pImageInfo = &image_infos.back(), + }); + } + for (const auto& sampler : stage.samplers) { + const auto ssharp = + stage.ReadUd(sampler.sgpr_base, sampler.dword_offset); + const auto vk_sampler = texture_cache.GetSampler(ssharp); + image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .pImageInfo = &image_infos.back(), + }); + } } - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes); + if (!set_writes.empty()) { + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + set_writes); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 75bd85ec..95accfbd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -11,6 +11,10 @@ namespace Core { class MemoryManager; } +namespace VideoCore { +class TextureCache; +} + namespace Vulkan { static constexpr u32 MaxVertexBufferCount = 32; @@ -18,6 +22,7 @@ static constexpr u32 MaxShaderStages = 5; class Instance; class Scheduler; +class StreamBuffer; using Liverpool = AmdGpu::Liverpool; @@ -33,6 +38,7 @@ struct PipelineKey { Liverpool::PrimitiveType prim_type; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; + std::array blend_controls; bool operator==(const PipelineKey& key) const noexcept { return std::memcmp(this, &key, sizeof(PipelineKey)) == 0; @@ -48,14 +54,15 @@ public: std::array modules); ~GraphicsPipeline(); - void BindResources(Core::MemoryManager* memory) const; + void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, + VideoCore::TextureCache& texture_cache) const; [[nodiscard]] vk::Pipeline Handle() const noexcept { return *pipeline; } private: - vk::UniqueDescriptorSetLayout BuildSetLayout() const; + void BuildDescSetLayout(); private: const Instance& instance; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 7419461c..da88ba4c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -151,6 +151,7 @@ bool Instance::CreateDevice() { custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -213,6 +214,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{ .indexTypeUint8 = true, }, + vk::PhysicalDeviceMaintenance4Features{ + .maintenance4 = true, + }, }; if (!index_type_uint8) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d917f4e4..14b5227e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,11 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -41,6 +43,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); + profile = Shader::Profile{ + .supported_spirv = 0x00010600U, + }; } const GraphicsPipeline* PipelineCache::GetPipeline() { @@ -63,6 +68,7 @@ void PipelineCache::RefreshKey() { key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); + key.blend_controls = regs.blend_control; const auto& db = regs.depth_buffer; key.depth_format = key.depth.depth_enable @@ -81,17 +87,15 @@ void PipelineCache::RefreshKey() { key.stage_hashes[i] = 0; continue; } - const u32* code = pgm->Address(); - - Shader::BinaryInfo bininfo; - std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); - key.stage_hashes[i] = bininfo.shader_hash; + const auto code = pgm->Code(); + key.stage_hashes[i] = XXH3_64bits(code.data(), code.size_bytes()); } } std::unique_ptr PipelineCache::CreatePipeline() { const auto& regs = liverpool->regs; + u32 binding{}; std::array programs; std::array infos{}; @@ -101,40 +105,49 @@ std::unique_ptr PipelineCache::CreatePipeline() { continue; } auto* pgm = regs.ProgramForStage(i); - const u32* code = pgm->Address(); + const auto code = pgm->Code(); - Shader::BinaryInfo bininfo; - std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); - const u32 num_dwords = bininfo.length / sizeof(u32); - - const auto it = module_map.find(bininfo.shader_hash); + const auto it = module_map.find(graphics_key.stage_hashes[i]); if (it != module_map.end()) { stages[i] = *it->second; continue; } + // Dump shader code if requested. + const auto stage = Shader::Stage{i}; + const u64 hash = graphics_key.stage_hashes[i]; + if (Config::dumpShaders()) { + DumpShader(code, hash, stage, "bin"); + } + block_pool.ReleaseContents(); inst_pool.ReleaseContents(); // Recompile shader to IR. - const auto stage = Shader::Stage{i}; const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); - programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords}, - std::move(info)); + programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); // Compile IR to SPIR-V - const auto profile = Shader::Profile{.supported_spirv = 0x00010600U}; - const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i]); - std::ofstream file("shader0.spv", std::ios::out | std::ios::binary); - file.write((const char*)spv_code.data(), spv_code.size() * 4); - file.close(); - + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); stages[i] = CompileSPV(spv_code, instance.GetDevice()); infos[i] = &programs[i].info; + + if (Config::dumpShaders()) { + DumpShader(spv_code, hash, stage, "spv"); + } } return std::make_unique(instance, scheduler, graphics_key, *pipeline_cache, infos, stages); } +void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, + std::string_view ext) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(code); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 32830eab..514adcbb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" namespace Shader { @@ -32,6 +33,8 @@ private: std::unique_ptr CreatePipeline(); + void DumpShader(std::span code, u64 hash, Shader::Stage stage, std::string_view ext); + private: const Instance& instance; Scheduler& scheduler; @@ -41,6 +44,7 @@ private: tsl::robin_map module_map; std::array stages{}; tsl::robin_map> graphics_pipelines; + Shader::Profile profile{}; PipelineKey graphics_key{}; Shader::ObjectPool inst_pool; Shader::ObjectPool block_pool; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a7483c27..cd52d796 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,16 +12,17 @@ namespace Vulkan { -static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eTransferDst; +static constexpr vk::BufferUsageFlags VertexIndexFlags = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformBuffer | + vk::BufferUsageFlagBits::eStorageBuffer; Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } @@ -35,9 +36,10 @@ void Rasterizer::Draw(bool is_indexed) { const auto cmdbuf = scheduler.CommandBuffer(); const auto& regs = liverpool->regs; const u32 num_indices = SetupIndexBuffer(is_indexed); - const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline(); - pipeline->BindResources(memory); + pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + + const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, @@ -88,18 +90,30 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { return regs.num_indices; } - const VAddr index_address = regs.index_base_address.Address(); - const auto [buffer, offset] = memory->GetVulkanBuffer(index_address); - const vk::IndexType index_type = - regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16 - : vk::IndexType::eUint32; + // Figure out index type and size. + const bool is_index16 = regs.index_buffer_type.index_type == Liverpool::IndexType::Index16; + const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; + const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); + + // Upload index data to stream buffer. + const auto index_address = regs.index_base_address.Address(); + const u32 index_buffer_size = regs.num_indices * 4; + const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size); + std::memcpy(data, index_address, index_buffer_size); + vertex_index_buffer.Commit(index_buffer_size); + + // Bind index buffer. const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(buffer, offset, index_type); + cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, index_type); return regs.num_indices; } void Rasterizer::UpdateDynamicState() { UpdateViewportScissorState(); + + auto& regs = liverpool->regs; + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setBlendConstants(®s.blend_constants.red); } void Rasterizer::UpdateViewportScissorState() { diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 6a7bba8e..efe3cf3d 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -44,6 +44,22 @@ using Libraries::VideoOut::TilingMode; return usage; } +[[nodiscard]] vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { + switch (type) { + case AmdGpu::ImageType::Color1D: + return vk::ImageType::e1D; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color1DArray: + case AmdGpu::ImageType::Cube: + return vk::ImageType::e2D; + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Color2DArray: + return vk::ImageType::e3D; + default: + UNREACHABLE(); + } +} + ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept { const auto& attrib = group.attrib; is_tiled = attrib.tiling_mode == TilingMode::Tile; @@ -72,10 +88,23 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { type = vk::ImageType::e2D; size.width = buffer.Pitch(); size.height = buffer.Height(); + size.depth = 1; pitch = size.width; guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); } +ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { + is_tiled = false; + pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); + type = ConvertImageType(image.type); + size.width = image.width + 1; + size.height = image.height + 1; + size.depth = 1; + // TODO: Derive this properly from tiling params + pitch = size.width; + guest_size_bytes = size.width * size.height * 4; +} + UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} @@ -109,7 +138,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { - vk::ImageCreateFlags flags{}; + vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat}; if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && info.size.width == info.size.height) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 92391fde..1f29d0d4 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -7,6 +7,7 @@ #include "common/types.h" #include "core/libraries/videoout/buffer.h" #include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -34,6 +35,7 @@ struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; + explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; vk::Format pixel_format = vk::Format::eUndefined; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 59a97447..8e63e039 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -1,11 +1,62 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/image_view.h" namespace VideoCore { +vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { + switch (type) { + case AmdGpu::ImageType::Color1D: + return vk::ImageViewType::e1D; + case AmdGpu::ImageType::Color1DArray: + return vk::ImageViewType::e1DArray; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Cube: + return vk::ImageViewType::e2D; + case AmdGpu::ImageType::Color2DArray: + return vk::ImageViewType::e2DArray; + case AmdGpu::ImageType::Color3D: + return vk::ImageViewType::e3D; + default: + UNREACHABLE(); + } +} + +vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { + switch (dst_sel) { + case 0: + return vk::ComponentSwizzle::eZero; + case 1: + return vk::ComponentSwizzle::eOne; + case 4: + return vk::ComponentSwizzle::eR; + case 5: + return vk::ComponentSwizzle::eG; + case 6: + return vk::ComponentSwizzle::eB; + case 7: + return vk::ComponentSwizzle::eA; + default: + UNREACHABLE(); + } +} + +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { + type = ConvertImageViewType(image.type); + format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); + range.base.level = image.base_level; + range.base.layer = 0; + range.extent.levels = 1; + range.extent.layers = 1; + mapping.r = ConvertComponentSwizzle(image.dst_sel_x); + mapping.g = ConvertComponentSwizzle(image.dst_sel_y); + mapping.b = ConvertComponentSwizzle(image.dst_sel_z); + mapping.a = ConvertComponentSwizzle(image.dst_sel_w); +} + ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, const ImageViewInfo& info_, vk::Image image) : info{info_} { diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index d84a963e..7f98e8ec 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,6 +3,7 @@ #pragma once +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/types.h" @@ -14,6 +15,9 @@ class Scheduler; namespace VideoCore { struct ImageViewInfo { + explicit ImageViewInfo() = default; + explicit ImageViewInfo(const AmdGpu::Image& image) noexcept; + vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; SubresourceRange range; diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp new file mode 100644 index 00000000..19c92712 --- /dev/null +++ b/src/video_core/texture_cache/sampler.cpp @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/texture_cache/sampler.h" + +namespace VideoCore { + +Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) { + using namespace Vulkan; + const vk::SamplerCreateInfo sampler_ci = { + .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), + .minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter), + .mipmapMode = LiverpoolToVK::MipFilter(sampler.mip_filter), + .addressModeU = LiverpoolToVK::ClampMode(sampler.clamp_x), + .addressModeV = LiverpoolToVK::ClampMode(sampler.clamp_y), + .addressModeW = LiverpoolToVK::ClampMode(sampler.clamp_z), + .mipLodBias = sampler.LodBias(), + .compareEnable = sampler.depth_compare_func != AmdGpu::DepthCompare::Never, + .compareOp = LiverpoolToVK::DepthCompare(sampler.depth_compare_func), + .minLod = sampler.MinLod(), + .maxLod = sampler.MaxLod(), + .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type), + .unnormalizedCoordinates = bool(sampler.force_unnormalized), + }; + handle = instance.GetDevice().createSamplerUnique(sampler_ci); +} + +Sampler::~Sampler() = default; + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/sampler.h b/src/video_core/texture_cache/sampler.h new file mode 100644 index 00000000..856d3976 --- /dev/null +++ b/src/video_core/texture_cache/sampler.h @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/resource.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +} + +namespace VideoCore { + +class Sampler { +public: + explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler); + ~Sampler(); + + Sampler(const Sampler&) = delete; + Sampler& operator=(const Sampler&) = delete; + + Sampler(Sampler&&) = default; + Sampler& operator=(Sampler&&) = default; + + vk::Sampler Handle() const noexcept { + return *handle; + } + +private: + vk::UniqueSampler handle; +}; + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index e21bb6ed..35bbb3f7 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,10 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include #include "common/assert.h" #include "common/config.h" -#include "core/libraries/videoout/buffer.h" #include "core/virtual_memory.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" @@ -137,6 +136,21 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } +ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { + Image& image = FindImage(ImageInfo{desc}, desc.Address()); + + const ImageViewInfo view_info{desc}; + if (const ImageViewId view_id = image.FindView(view_info); view_id) { + return slot_image_views[view_id]; + } + + const ImageViewId view_id = + slot_image_views.insert(instance, scheduler, view_info, image.image); + image.image_view_infos.emplace_back(view_info); + image.image_view_ids.emplace_back(view_id); + return slot_image_views[view_id]; +} + ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { const ImageInfo info{buffer}; auto& image = FindImage(info, buffer.Address()); @@ -159,7 +173,7 @@ void TextureCache::RefreshImage(Image& image) { image.flags &= ~ImageFlagBits::CpuModified; // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 0); + const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); const u8* image_data = reinterpret_cast(image.cpu_addr); if (image.info.is_tiled) { ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height, @@ -202,6 +216,12 @@ void TextureCache::RefreshImage(Image& image) { vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } +vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { + const u64 hash = XXH3_64bits(&sampler, sizeof(sampler)); + const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler); + return it->second.Handle(); +} + void TextureCache::RegisterImage(ImageId image_id) { Image& image = slot_images[image_id]; ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f59f16c4..a09d1194 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,9 +7,11 @@ #include #include +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" +#include "video_core/texture_cache/sampler.h" #include "video_core/texture_cache/slot_vector.h" namespace Core::Libraries::VideoOut { @@ -36,12 +38,18 @@ public: /// Retrieves the image handle of the image with the provided attributes and address. Image& FindImage(const ImageInfo& info, VAddr cpu_address); + /// Retrieves an image view with the properties of the specified image descriptor. + ImageView& FindImageView(const AmdGpu::Image& image); + /// Retrieves the render target with specified properties ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); /// Reuploads image contents. void RefreshImage(Image& image); + /// Retrieves the sampler that matches the provided S# descriptor. + vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); + private: /// Iterate over all page indices in a range template @@ -121,6 +129,7 @@ private: Vulkan::StreamBuffer staging; SlotVector slot_images; SlotVector slot_image_views; + tsl::robin_map samplers; tsl::robin_pg_map> page_table; boost::icl::interval_map cached_pages; #ifdef _WIN64 diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 8cd21640..7d961921 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "video_core/texture_cache/tile_manager.h" namespace VideoCore { From 4d728e943d6df4ed0de5234a0cf3f082db2fd1e4 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 27 May 2024 22:12:49 +0300 Subject: [PATCH 2/2] video_core: Address some feedback --- src/core/libraries/gnmdriver/gnmdriver.cpp | 32 +++++++++---------- .../backend/spirv/spirv_emit_context.cpp | 4 +-- src/video_core/amdgpu/liverpool.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++ .../renderer_vulkan/vk_graphics_pipeline.h | 5 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 3 ++ .../renderer_vulkan/vk_rasterizer.cpp | 7 ++-- 8 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 67558945..a2358955 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -888,29 +888,27 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, // A fullscreen triangle with one uv set // clang-format off constexpr static std::array shader_code alignas(256) = { - 0xbeeb03ffu, 0x00000009u, // s_mov_b32 vcc_hi, lit(9) - 0x36020081u, // v_and_b32 v1, 1, v0 - 0x36000082u, // v_and_b32 v0, 2, v0 - 0x7e000d00u, // v_cvt_f32_u32 v0, v0 - 0x7e040d01u, // v_cvt_f32_u32 v2, v1 - 0xd2820003u, 0x3ce00f4u, // v_mad_f32 v3, 2.0, v0, -1.0 - 0xd2820004u, 0x3ce04f6u, // v_mad_f32 v4, 4.0, v2, -1.0 - 0x7e020280u, // v_mov_b32 v1, 0 - 0x7e0a02f2u, // v_mov_b32 v5, 1.0 - 0xf80008cfu, 0x5010403u, // exp pos0, v3, v4, v1, v5 done - 0x100404f4u, // v_mul_f32 v2, 2.0, v2 - 0xf800020fu, 0x1010200u, // exp param0, v0, v2, v1, v1 - 0xbf810000u, // s_endpgm - 0x302u, - 0x46d611cu, + 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 + 0x36020081u, // v_and_b32 v1, 1, v0 + 0x34020281u, // v_lshlrev_b32 v1, 1, v1 + 0x360000c2u, // v_and_b32 v0, -2, v0 + 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 + 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 + 0x7e020b01u, // v_cvt_f32_i32 v1, v1 + 0x7E000B00U, + 0x7e040280u, // v_cvt_f32_i32 v0, v0 + 0x7e0602f2u, // v_mov_b32 v3, 1.0 + 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done + 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 + 0xbf810000u, // s_endpgm // OrbShdr header - 0x5362724fu, 0x7726468u, 0x4845u, 0x5080002u, 0xd1e7de61u, 0x0u, 0xb9cae598u, + 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, }; // clang-format on const auto shader_addr = uintptr_t(shader_code.data()); // Original address is 0xfe000f10 - ASSERT((shader_addr & 0xFF) == 0); const static u32 vs_regs[] = { u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4b9e696e..a2a0fc9a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -237,9 +237,9 @@ void EmitContext::DefineBuffers(const Info& info) { ASSERT(buffer.stride % sizeof(float) == 0); const u32 num_elements = buffer.stride * buffer.num_records / sizeof(float); const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))}; - Decorate(record_array_type, spv::Decoration::ArrayStride, sizeof(float)); - const Id struct_type{TypeStruct(record_array_type)}; + Decorate(record_array_type, spv::Decoration::ArrayStride, 4); + const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); Name(struct_type, name); Decorate(struct_type, spv::Decoration::Block); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9033fbab..1ddf4fc9 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -65,7 +65,7 @@ struct Liverpool { } std::span Code() const { - u32 code_size = 1; + u32 code_size = 0; const u32* code = Address(); static constexpr std::string_view PostHeader = "OrbShdr"; while (std::memcmp(code + code_size, PostHeader.data(), PostHeader.size()) != 0) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 315015cc..b15760ba 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -80,7 +80,7 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; case Liverpool::PrimitiveType::RectList: - return vk::PrimitiveTopology::eTriangleList; + return vk::PrimitiveTopology::eTriangleStrip; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index bede2225..1815224c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -62,6 +62,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = attributes.data(), }; + ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(), + "Rectangle List primitive type is only supported for embedded VS"); + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), .primitiveRestartEnable = false, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 95accfbd..fc8b4fa1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -61,6 +61,11 @@ public: return *pipeline; } + [[nodiscard]] bool IsEmbeddedVs() const noexcept { + static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd; + return key.stage_hashes[0] == EmbeddedVsHash; + } + private: void BuildDescSetLayout(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 14b5227e..54f81267 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -145,6 +145,9 @@ void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stag std::string_view ext) { using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; file.WriteSpan(code); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index cd52d796..aea93487 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -22,7 +22,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } @@ -63,7 +63,8 @@ void Rasterizer::Draw(bool is_indexed) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0); + const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices; + cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); } @@ -97,7 +98,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { // Upload index data to stream buffer. const auto index_address = regs.index_base_address.Address(); - const u32 index_buffer_size = regs.num_indices * 4; + const u32 index_buffer_size = regs.num_indices * index_size; const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size); std::memcpy(data, index_address, index_buffer_size); vertex_index_buffer.Commit(index_buffer_size);