From 3c90b8ac0089234630582416e0a6fb13478459fa Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 25 May 2024 15:33:15 +0300 Subject: [PATCH] video_core: Bringup some basic functionality (#145) * video_core: Remove hack in rasterizer * The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers * shader_recompiler: Implement attribute loads/stores * video_core: Add basic vertex, index buffer handling and pipeline caching * externals: Make xxhash lowercase --- .gitmodules | 6 +- CMakeLists.txt | 3 + externals/CMakeLists.txt | 6 +- externals/{xxHash => xxhash} | 0 src/core/memory.cpp | 93 +++++++++++ src/core/memory.h | 28 +++- src/main.cpp | 1 - .../backend/spirv/emit_spirv.cpp | 6 +- .../spirv/emit_spirv_context_get_set.cpp | 17 +- .../backend/spirv/emit_spirv_instructions.h | 6 +- .../backend/spirv/spirv_emit_context.cpp | 109 ++++++++++++- .../backend/spirv/spirv_emit_context.h | 18 +-- .../frontend/fetch_shader.cpp | 83 ++++++++++ src/shader_recompiler/frontend/fetch_shader.h | 21 +++ .../frontend/structured_control_flow.cpp | 13 +- .../frontend/structured_control_flow.h | 4 +- .../frontend/translate/translate.cpp | 41 ++++- .../frontend/translate/translate.h | 10 +- .../frontend/translate/vector_alu.cpp | 5 +- .../translate/vector_interpolation.cpp | 4 +- src/shader_recompiler/ir/attribute.cpp | 4 + src/shader_recompiler/ir/attribute.h | 8 +- src/shader_recompiler/ir/ir_emitter.cpp | 8 - src/shader_recompiler/ir/ir_emitter.h | 8 +- .../ir/passes/info_collection.cpp | 33 ++++ src/shader_recompiler/ir/passes/passes.h | 4 +- .../ir/passes/resource_tracking_pass.cpp | 5 +- src/shader_recompiler/ir/program.h | 8 +- src/shader_recompiler/recompiler.cpp | 26 +-- src/shader_recompiler/recompiler.h | 8 +- src/shader_recompiler/runtime_info.h | 153 +++++++----------- src/video_core/amdgpu/liverpool.cpp | 6 +- src/video_core/amdgpu/liverpool.h | 149 +++++++++-------- src/video_core/amdgpu/pixel_format.cpp | 36 ++++- src/video_core/amdgpu/pixel_format.h | 16 +- .../renderer_vulkan/liverpool_to_vk.cpp | 41 +++++ .../renderer_vulkan/liverpool_to_vk.h | 8 + .../renderer_vulkan/renderer_vulkan.cpp | 2 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 77 +++++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 35 +++- .../renderer_vulkan/vk_instance.cpp | 10 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 139 +++++++++++----- .../renderer_vulkan/vk_pipeline_cache.h | 13 +- .../renderer_vulkan/vk_rasterizer.cpp | 72 ++++++--- .../renderer_vulkan/vk_rasterizer.h | 19 +-- .../renderer_vulkan/vk_stream_buffer.h | 2 +- src/video_core/texture_cache/image.cpp | 11 ++ src/video_core/texture_cache/image.h | 2 + .../texture_cache/texture_cache.cpp | 34 ++-- src/video_core/texture_cache/texture_cache.h | 2 +- 50 files changed, 1030 insertions(+), 383 deletions(-) rename externals/{xxHash => xxhash} (100%) create mode 100644 src/shader_recompiler/frontend/fetch_shader.cpp create mode 100644 src/shader_recompiler/frontend/fetch_shader.h create mode 100644 src/shader_recompiler/ir/passes/info_collection.cpp diff --git a/.gitmodules b/.gitmodules index 76ca5dca..385021a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,12 +50,12 @@ [submodule "externals/toml11"] path = externals/toml11 url = https://github.com/ToruNiina/toml11.git -[submodule "externals/xxHash"] - path = externals/xxHash - url = https://github.com/Cyan4973/xxHash.git [submodule "externals/zydis"] path = externals/zydis url = https://github.com/zyantific/zydis.git [submodule "externals/sirit"] path = externals/sirit url = https://github.com/raphaelthegreat/sirit +[submodule "externals/xxhash"] + path = externals/xxhash + url = https://github.com/Cyan4973/xxHash.git diff --git a/CMakeLists.txt b/CMakeLists.txt index dae1e4f2..150c1001 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/frontend/control_flow_graph.h src/shader_recompiler/frontend/decode.cpp src/shader_recompiler/frontend/decode.h + src/shader_recompiler/frontend/fetch_shader.cpp + src/shader_recompiler/frontend/fetch_shader.h src/shader_recompiler/frontend/format.cpp src/shader_recompiler/frontend/instruction.cpp src/shader_recompiler/frontend/instruction.h @@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/constant_propogation_pass.cpp + src/shader_recompiler/ir/passes/info_collection.cpp src/shader_recompiler/ir/passes/passes.h src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 24bca2f1..b2d348b7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL) add_subdirectory(toml11 EXCLUDE_FROM_ALL) # xxHash -add_library(xxhash INTERFACE) -target_include_directories(xxhash INTERFACE xxhash) +add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c) +target_include_directories(xxhash PUBLIC xxhash) # Zydis option(ZYDIS_BUILD_TOOLS "" OFF) @@ -92,4 +92,4 @@ endif() add_subdirectory(sirit EXCLUDE_FROM_ALL) if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") -endif() \ No newline at end of file +endif() diff --git a/externals/xxHash b/externals/xxhash similarity index 100% rename from externals/xxHash rename to externals/xxhash diff --git a/src/core/memory.cpp b/src/core/memory.cpp index aa5c6676..a16abbe0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -7,6 +7,7 @@ #include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/memory.h" +#include "video_core/renderer_vulkan/vk_instance.h" namespace Core { @@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.prot = prot; new_vma.name = name; new_vma.type = type; + + if (type == VMAType::Direct) { + MapVulkanMemory(mapped_addr, size); + } }; // When virtual addr is zero let the address space manager pick the address. @@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, "Attempting to unmap partially mapped range"); + if (it->second.type == VMAType::Direct) { + UnmapVulkanMemory(virtual_addr, size); + } + // Mark region as free and attempt to coalesce it with neighbours. auto& vma = it->second; vma.type = VMAType::Free; @@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { impl.Unmap(virtual_addr, size); } +std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { + auto it = mapped_memories.upper_bound(addr); + it = std::prev(it); + ASSERT(it != mapped_memories.end() && it->first <= addr); + return std::make_pair(*it->second.buffer, addr - it->first); +} + VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); @@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) { return iter; } +void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { + const vk::Device device = instance->GetDevice(); + const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); + void* host_pointer = reinterpret_cast(addr); + const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT( + vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer); + ASSERT(host_mem_props.memoryTypeBits != 0); + + int mapped_memory_type = -1; + auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) { + u32 host_mem_types = host_mem_props.memoryTypeBits; + while (host_mem_types != 0) { + // Try to find a cached memory type + mapped_memory_type = std::countr_zero(host_mem_types); + host_mem_types -= (1 << mapped_memory_type); + + if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) { + return; + } + } + + mapped_memory_type = -1; + }; + + // First try to find a memory that is both coherent and cached + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eHostCached); + if (mapped_memory_type == -1) + // Then only coherent (lower performance) + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent); + + if (mapped_memory_type == -1) { + LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping"); + mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits); + } + + const vk::StructureChain alloc_info = { + vk::MemoryAllocateInfo{ + .allocationSize = size, + .memoryTypeIndex = static_cast(mapped_memory_type), + }, + vk::ImportMemoryHostPointerInfoEXT{ + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + .pHostPointer = host_pointer, + }, + }; + + const auto [it, new_memory] = mapped_memories.try_emplace(addr); + ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory"); + + auto& memory = it->second; + memory.backing = device.allocateMemoryUnique(alloc_info.get()); + + constexpr vk::BufferUsageFlags MapFlags = + vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eUniformBuffer; + + const vk::StructureChain buffer_info = { + vk::BufferCreateInfo{ + .size = size, + .usage = MapFlags, + .sharingMode = vk::SharingMode::eExclusive, + }, + vk::ExternalMemoryBufferCreateInfoKHR{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + }}; + memory.buffer = device.createBufferUnique(buffer_info.get()); + device.bindBufferMemory(*memory.buffer, *memory.backing, 0); +} + +void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { + const auto it = mapped_memories.find(addr); + ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); + mapped_memories.erase(it); +} + } // namespace Core diff --git a/src/core/memory.h b/src/core/memory.h index a86930c8..4c0fadbf 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -10,6 +11,11 @@ #include "common/singleton.h" #include "common/types.h" #include "core/address_space.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +} namespace Core { @@ -86,6 +92,10 @@ public: explicit MemoryManager(); ~MemoryManager(); + void SetInstance(const Vulkan::Instance* instance_) { + instance = instance_; + } + PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, int memory_type); @@ -97,11 +107,9 @@ public: void UnmapMemory(VAddr virtual_addr, size_t size); -private: - bool HasOverlap(VAddr addr, size_t size) const { - return vma_map.find(addr) != vma_map.end(); - } + std::pair GetVulkanBuffer(VAddr addr); +private: VMAHandle FindVMA(VAddr target) { // Return first the VMA with base >= target. const auto it = vma_map.lower_bound(target); @@ -117,10 +125,22 @@ private: VMAHandle MergeAdjacent(VMAHandle iter); + void MapVulkanMemory(VAddr addr, size_t size); + + void UnmapVulkanMemory(VAddr addr, size_t size); + private: AddressSpace impl; std::vector allocations; VMAMap vma_map; + + struct MappedMemory { + vk::UniqueBuffer buffer; + vk::UniqueDeviceMemory backing; + size_t buffer_size; + }; + std::map mapped_memories; + const Vulkan::Instance* instance{}; }; using Memory = Common::Singleton; diff --git a/src/main.cpp b/src/main.cpp index 2a7f839e..43196046 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,7 +20,6 @@ #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" #include "core/linker.h" -#include "core/tls.h" #include "input/controller.h" #include "sdl_window.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index f341d465..fb9c67d6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.stage) { + switch (program.info.stage) { case Stage::Compute: { // const std::array workgroup_size{program.workgroup_size}; // execution_model = spv::ExecutionModel::GLCompute; @@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { // } break; default: - throw NotImplementedException("Stage {}", u32(program.stage)); + throw NotImplementedException("Stage {}", u32(program.info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -222,7 +222,7 @@ std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Binding EmitContext ctx{profile, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - if (program.stage == Stage::Vertex) { + if (program.info.stage == Stage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0ba72f2a..2ed4e29d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,12 +10,11 @@ namespace { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index).at(element)}; + const auto& info{ctx.output_params.at(index)}; if (info.num_components == 1) { return info.id; } else { - const u32 index_element{element - info.first_element}; - return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element)); + return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element)); } } switch (attr) { @@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR:: throw LogicError("Unreachable instruction"); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { - const u32 element{static_cast(attr) % 4}; +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; if (!ValidId(param.id)) { // Attribute is disabled or varying component is not written - return ctx.ConstF32(element == 3 ? 1.0f : 0.0f); + return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); } - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))}; + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; return ctx.OpLoad(param.component_type, pointer); } throw NotImplementedException("Read attribute {}", attr); } -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { switch (attr) { case IR::Attribute::VertexId: return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); @@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Param0) { - return; - } const Id pointer{OutputAttrPointer(ctx, attr, element)}; ctx.OpStore(pointer, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 20d58e90..24685275 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu const IR::Value& offset); Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e9a55766..771e46d4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} { u32& uniform_binding{bindings.unified}; u32& storage_binding{bindings.unified}; u32& texture_binding{bindings.unified}; @@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() { u32_zero_value = ConstU32(0U); f32_zero_value = ConstF32(0.0f); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); } @@ -107,26 +111,123 @@ void EmitContext::DefineInterfaces(const IR::Program& program) { DefineOutputs(program); } +Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Sint: + return ctx.S32[4]; + case AmdGpu::NumberFormat::Uint: + return ctx.U32[4]; + case AmdGpu::NumberFormat::Sscaled: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Uscaled: + return ctx.F32[4]; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uint: + return {id, input_u32, U32[1], 4}; + case AmdGpu::NumberFormat::Sint: + return {id, input_s32, S32[1], 4}; + case AmdGpu::NumberFormat::Sscaled: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uscaled: + return {id, input_f32, F32[1], 4}; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { + switch (default_value) { + case 0: + return ctx.ConstF32(0.f, 0.f, 0.f, 0.f); + case 1: + return ctx.ConstF32(0.f, 0.f, 0.f, 1.f); + case 2: + return ctx.ConstF32(1.f, 1.f, 1.f, 0.f); + case 3: + return ctx.ConstF32(1.f, 1.f, 1.f, 1.f); + default: + UNREACHABLE(); + } +} + void EmitContext::DefineInputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + for (const auto& input : info.vs_inputs) { + const Id type{GetAttributeType(*this, input.fmt)}; + const Id id{DefineInput(type, input.binding)}; + Name(id, fmt::format("vs_in_attr{}", input.binding)); + input_params[input.binding] = GetAttributeInfo(input.fmt, id); + interfaces.push_back(id); + } break; + case Stage::Fragment: + for (const auto& input : info.ps_inputs) { + if (input.is_default) { + input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), + input_f32, F32[1]}; + continue; + } + const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; + const u32 num_components = info.loads.NumComponents(param); + const Id type{F32[num_components]}; + const Id id{DefineInput(type, input.semantic)}; + if (input.is_flat) { + Decorate(id, spv::Decoration::Flat); + } + Name(id, fmt::format("fs_in_attr{}", input.semantic)); + input_params[input.semantic] = {id, input_f32, F32[1], num_components}; + interfaces.push_back(id); + } default: break; } } void EmitContext::DefineOutputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = {id, output_f32, F32[1], num_components}; + interfaces.push_back(id); + } break; case Stage::Fragment: - frag_color[0] = DefineOutput(F32[4], 0); - Name(frag_color[0], fmt::format("frag_color{}", 0)); - interfaces.push_back(frag_color[0]); + for (u32 i = 0; i < IR::NumRenderTargets; i++) { + const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; + if (!info.stores.GetAny(mrt)) { + continue; + } + frag_color[i] = DefineOutput(F32[4], i); + Name(frag_color[i], fmt::format("frag_color{}", i)); + interfaces.push_back(frag_color[i]); + } break; default: break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index bf78a445..26298e38 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -135,6 +135,9 @@ public: Id u32_zero_value{}; Id f32_zero_value{}; + Id input_u32{}; + Id input_f32{}; + Id input_s32{}; Id output_u32{}; Id output_f32{}; @@ -145,25 +148,22 @@ public: Id base_vertex{}; std::array frag_color{}; - struct InputParamInfo { + struct SpirvAttribute { Id id; Id pointer_type; Id component_type; + u32 num_components; }; - std::array input_params{}; - - struct ParamElementInfo { - Id id{}; - u32 first_element{}; - u32 num_components{}; - }; - std::array, 32> output_params{}; + std::array input_params{}; + std::array output_params{}; private: void DefineArithmeticTypes(); void DefineInterfaces(const IR::Program& program); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); + + SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp new file mode 100644 index 00000000..7f4f50e9 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/fetch_shader.h" + +namespace Shader::Gcn { + +/** + * s_load_dwordx4 s[8:11], s[2:3], 0x00 + * s_load_dwordx4 s[12:15], s[2:3], 0x04 + * s_load_dwordx4 s[16:19], s[2:3], 0x08 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen + * buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen + * buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen + * s_waitcnt 0 + * s_setpc_b64 s[0:1] + + * s_load_dwordx4 s[4:7], s[2:3], 0x0 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen + * s_load_dwordx4 s[4:7], s[2:3], 0x8 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen + * s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) + * s_setpc_b64 s[0:1] + + * A normal fetch shader looks like the above, the instructions are generated + * using input semantics on cpu side. Load instructions can either be separate or interleaved + * We take the reverse way, extract the original input semantics from these instructions. + **/ + +std::vector ParseFetchShader(const u32* code) { + std::vector attributes; + GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); + GcnDecodeContext decoder; + + struct VsharpLoad { + u32 dword_offset{}; + s32 base_sgpr{}; + s32 dst_reg{-1}; + }; + boost::container::static_vector loads; + + u32 semantic_index = 0; + while (!code_slice.atEnd()) { + const auto inst = decoder.decodeInstruction(code_slice); + if (inst.opcode == Opcode::S_SETPC_B64) { + break; + } + + if (inst.inst_class == InstClass::ScalarMemRd) { + loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code); + continue; + } + + if (inst.inst_class == InstClass::VectorMemBufFmt) { + // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs + const u32 base_sgpr = inst.src[2].code * 4; + + // Find the load instruction that loaded the V# to the SPGR. + // This is so we can determine its index in the vertex table. + const auto it = std::ranges::find_if( + loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; }); + + auto& attrib = attributes.emplace_back(); + attrib.semantic = semantic_index++; + attrib.dest_vgpr = inst.src[1].code; + attrib.num_elements = inst.control.mubuf.count; + attrib.sgpr_base = it->base_sgpr; + attrib.dword_offset = it->dword_offset; + + // Mark load as used. + it->dst_reg = -1; + } + } + + return attributes; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h new file mode 100644 index 00000000..2f8eae12 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::Gcn { + +struct VertexAttribute { + u8 semantic; ///< Semantic index of the attribute + u8 dest_vgpr; ///< Destination VGPR to load first component. + u8 num_elements; ///< Number of components to load + u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# + u8 dword_offset; ///< The dword offset of the V# that describes this attribute. +}; + +std::vector ParseFetchShader(const u32* code); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index f593529d..2064c6a5 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -600,9 +600,9 @@ public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Stage stage_) + Info& info_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; @@ -633,8 +633,7 @@ private: ensure_block(); const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; - Translate(current_block, stage, inst_list.subspan(start, size)); - fmt::print("{}\n", IR::DumpBlock(*current_block)); + Translate(current_block, inst_list.subspan(start, size), info); break; } case StatementType::SetVariable: { @@ -812,17 +811,17 @@ private: IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{}; std::span inst_list; - Stage stage; + Info& info; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Stage stage) { + CFG& cfg, Info& info) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index fa7b6738..09814349 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -10,13 +10,13 @@ #include "shader_recompiler/object_pool.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, CFG& cfg, - Stage stage); + Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 002351ca..06faf28d 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} { +Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} { IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (stage) { + switch (info.stage) { case Stage::Vertex: // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); @@ -92,11 +94,39 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { } } -void Translate(IR::Block* block, Stage stage, std::span inst_list) { +void Translator::EmitFetch(const GcnInst& inst) { + // Read the pointer to the fetch shader assembly. + const u32 sgpr_base = inst.src[0].code; + const u32* code; + std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); + + // Parse the assembly to generate a list of attributes. + const auto attribs = ParseFetchShader(code); + for (const auto& attrib : attribs) { + const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; + IR::VectorReg dst_reg{attrib.dest_vgpr}; + for (u32 i = 0; i < attrib.num_elements; i++) { + ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i)); + } + + // Read the V# of the attribute to figure out component number and type. + const auto buffer = info.ReadUd(attrib.sgpr_base, attrib.dword_offset); + const u32 num_components = AmdGpu::NumComponents(buffer.data_format); + info.vs_inputs.push_back({ + .fmt = buffer.num_format, + .binding = attrib.semantic, + .num_components = std::min(attrib.num_elements, num_components), + .sgpr_base = attrib.sgpr_base, + .dword_offset = attrib.dword_offset, + }); + } +} + +void Translate(IR::Block* block, std::span inst_list, Info& info) { if (inst_list.empty()) { return; } - Translator translator{block, stage}; + Translator translator{block, info}; for (const auto& inst : inst_list) { switch (inst.opcode) { case Opcode::S_MOV_B32: @@ -115,6 +145,9 @@ void Translate(IR::Block* block, Stage stage, std::span inst_list translator.V_MUL_F32(inst); break; case Opcode::S_SWAPPC_B64: + ASSERT(info.stage == Stage::Vertex); + translator.EmitFetch(inst); + break; case Opcode::S_WAITCNT: break; // Ignore for now. case Opcode::S_BUFFER_LOAD_DWORDX16: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f972bef..7e0186f3 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -7,9 +7,10 @@ #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/runtime_info.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { @@ -25,7 +26,9 @@ enum class ConditionOp : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Stage stage); + explicit Translator(IR::Block* block_, Info& info); + + void EmitFetch(const GcnInst& inst); // Scalar ALU void S_MOV(const GcnInst& inst); @@ -66,8 +69,9 @@ private: private: IR::Block* block; IR::IREmitter ir; + Info& info; }; -void Translate(IR::Block* block, Stage stage, std::span inst_list); +void Translate(IR::Block* block, std::span inst_list, Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index cbb3268c..99cebdd2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) { void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])), - ir.FPConvert(16, GetSrc(inst.src[1]))); - ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32)); + const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } void Translator::V_MUL_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 47c98cd7..7d41d430 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,7 +7,9 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr}; + auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + attr.semantic = inst.control.vintrp.attr; + const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 714053bc..3b60bf65 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) { return "Param31"; case Attribute::VertexId: return "VertexId"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::FragCoord: + return "FragCoord"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index a4d76dbf..687d3ad4 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,10 +72,12 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, + Max, }; -constexpr size_t EXP_NUM_POS = 4; -constexpr size_t EXP_NUM_PARAM = 32; +constexpr size_t NumAttributes = static_cast(Attribute::Max); +constexpr size_t NumRenderTargets = 8; +constexpr size_t NumParams = 32; [[nodiscard]] bool IsParam(Attribute attribute) noexcept; @@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32; if (result > static_cast(Attribute::Param31)) { throw LogicError("Overflow on register arithmetic"); } - if (result < static_cast(Attribute::Param0)) { + if (result < static_cast(Attribute::RenderTarget0)) { throw LogicError("Underflow on register arithmetic"); } return static_cast(result); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index aae23ef6..8bea18e0 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) { Inst(Opcode::SetVcc, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return GetAttribute(attribute, 0); -} - F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } -U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { - return GetAttributeU32(attribute, 0); -} - U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttributeU32, attribute, Imm32(comp)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 8c8f657e..f6bc8807 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -58,11 +58,9 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp); - void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0); + [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); + void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); diff --git a/src/shader_recompiler/ir/passes/info_collection.cpp b/src/shader_recompiler/ir/passes/info_collection.cpp new file mode 100644 index 00000000..99aedbc4 --- /dev/null +++ b/src/shader_recompiler/ir/passes/info_collection.cpp @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void Visit(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetAttribute: + case IR::Opcode::GetAttributeU32: { + info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); + break; + } + case IR::Opcode::SetAttribute: { + info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); + break; + } + default: + break; + } +} + +void CollectShaderInfoPass(IR::Program& program) { + Info& info{program.info}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h index 49bb09b1..915bb80e 100644 --- a/src/shader_recompiler/ir/passes/passes.h +++ b/src/shader_recompiler/ir/passes/passes.h @@ -4,6 +4,7 @@ #pragma once #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" namespace Shader::Optimization { @@ -11,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program); -void ResourceTrackingPass(IR::BlockList& program); +void ResourceTrackingPass(IR::Program& program); +void CollectShaderInfoPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index feb213df..39f0b808 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) { }; } -void ResourceTrackingPass(IR::BlockList& program) { - for (IR::Block* const block : program) { +void ResourceTrackingPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsResourceInstruction(inst)) { continue; } - printf("ff\n"); IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto loc = TrackSharp(producer->Arg(0)); fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f4f5197f..eff933f2 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -3,15 +3,11 @@ #pragma once -#include #include #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" - -namespace Shader { -enum class Stage : u32; -} +#include "shader_recompiler/runtime_info.h" namespace Shader::IR { @@ -20,7 +16,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; std::vector ins_list; - Stage stage; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5bc521bd..66d19620 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -30,9 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span token) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span token, const Info&& info) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -40,6 +38,11 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); Gcn::GcnDecodeContext decoder; + static int counter = 0; + std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary); + file.write((const char*)token.data(), token.size_bytes()); + file.close(); + // Decode and save instructions IR::Program program; program.ins_list.reserve(token.size()); @@ -52,21 +55,24 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage); + program.info = std::move(info); + program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); - program.stage = stage; // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); - // Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); + Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::DeadCodeEliminationPass(program.blocks); + Shader::Optimization::CollectShaderInfoPass(program); - // TODO: Pass profile from vulkan backend - const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); - return code; + for (const auto& block : program.blocks) { + fmt::print("{}\n", IR::DumpBlock(*block)); + } + + return program; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8cd9c7ea..da6cdfaa 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,7 +3,9 @@ #pragma once +#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/object_pool.h" namespace Shader { @@ -26,8 +28,8 @@ struct BinaryInfo { u32 crc32; }; -[[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span code); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, + std::span code, const Info&& info); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 052108b7..40c9c6b0 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,39 +3,16 @@ #pragma once -#include -#include -#include "shader_recompiler/ir/type.h" +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "video_core/amdgpu/pixel_format.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - SignedScaled, - UnsignedScaled, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; +static constexpr size_t NumUserDataRegs = 16; enum class Stage : u32 { Vertex, @@ -62,78 +39,64 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 7; -enum class Interpolation { - Smooth, - Flat, - NoPerspective, -}; - -struct ConstantBufferDescriptor { - u32 index; - u32 count; - - auto operator<=>(const ConstantBufferDescriptor&) const = default; -}; - -struct TextureDescriptor { - TextureType type; - bool is_eud; - bool is_depth; - bool is_multisample; - bool is_storage; - u32 count; - u32 eud_offset_dwords; - u32 ud_index_dwords; - - auto operator<=>(const TextureDescriptor&) const = default; -}; -using TextureDescriptors = boost::container::small_vector; - struct Info { - bool uses_workgroup_id{}; - bool uses_local_invocation_id{}; - bool uses_invocation_id{}; - bool uses_invocation_info{}; - bool uses_sample_id{}; + struct VsInput { + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + u8 sgpr_base; + u8 dword_offset; + }; + boost::container::static_vector vs_inputs{}; - std::array interpolation{}; - // VaryingState loads; - // VaryingState stores; - // VaryingState passthrough; + struct PsInput { + u32 param_index; + u32 semantic; + bool is_default; + bool is_flat; + u32 default_value; + }; + boost::container::static_vector ps_inputs{}; - std::array stores_frag_color{}; - bool stores_sample_mask{}; - bool stores_frag_depth{}; + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[Index(attrib)] & (1 << comp); + } - bool uses_fp16{}; - bool uses_fp64{}; - bool uses_fp16_denorms_flush{}; - bool uses_fp16_denorms_preserve{}; - bool uses_fp32_denorms_flush{}; - bool uses_fp32_denorms_preserve{}; - bool uses_int8{}; - bool uses_int16{}; - bool uses_int64{}; - bool uses_image_1d{}; - bool uses_sampled_1d{}; - bool uses_subgroup_vote{}; - bool uses_subgroup_mask{}; - bool uses_derivatives{}; + bool GetAny(IR::Attribute attrib) const { + return flags[Index(attrib)]; + } - IR::Type used_constant_buffer_types{}; - IR::Type used_storage_buffer_types{}; - IR::Type used_indirect_cbuf_types{}; + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[Index(attrib)] |= (1 << comp); + } - // std::array constant_buffer_used_sizes{}; - u32 used_clip_distances{}; + u32 NumComponents(IR::Attribute attrib) const { + const u8 mask = flags[Index(attrib)]; + ASSERT(mask != 0b1011 || mask != 0b1101); + return std::popcount(mask); + } - // boost::container::static_vector - // constant_buffer_descriptors; - // boost::container::static_vector - // storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; - // ImageBufferDescriptors image_buffer_descriptors; - // TextureDescriptors texture_descriptors; - // ImageDescriptors image_descriptors; + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + std::span user_data; + Stage stage; + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + u32* base; + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } }; } // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 09c1cb66..d43f749b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->DrawIndex(); + rasterizer->Draw(true); } break; } @@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - // rasterizer->DrawIndex(); + if (rasterizer) { + rasterizer->Draw(false); + } break; } case PM4ItOpcode::DispatchDirect: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index f0a27bb1..83fd2494 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" +#include "video_core/amdgpu/pixel_format.h" #include #include @@ -32,13 +33,13 @@ struct Liverpool { static constexpr u32 NumColorBuffers = 8; static constexpr u32 NumViewports = 16; static constexpr u32 NumClipPlanes = 6; - static constexpr u32 NumWordsShaderUserData = 16; + static constexpr u32 NumShaderUserData = 16; static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 ContextRegWordOffset = 0xA000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; - using UserData = std::array; + using UserData = std::array; struct ShaderProgram { u32 address_lo; @@ -57,6 +58,14 @@ struct Liverpool { } }; + union PsInputControl { + u32 raw; + BitField<0, 5, u32> input_offset; + BitField<5, 1, u32> use_default; + BitField<8, 2, u32> default_value; + BitField<10, 1, u32> flat_shade; + }; + enum class ShaderExportComp : u32 { None = 0, OneComp = 1, @@ -171,25 +180,6 @@ struct Liverpool { BitField<31, 1, u32> disable_color_writes_on_depth_pass; }; - union DepthSize { - u32 raw; - BitField<0, 11, u32> pitch_tile_max; - BitField<11, 11, u32> height_tile_max; - - u32 Pitch() const { - return (pitch_tile_max + 1) << 3; - } - - u32 Height() const { - return (height_tile_max + 1) << 3; - } - }; - - union DepthSlice { - u32 raw; - BitField<0, 22, u32> slice_tile_max; - }; - enum class StencilFunc : u32 { Keep = 0, Zero = 1, @@ -227,9 +217,45 @@ struct Liverpool { BitField<24, 8, u32> stencil_op_val; }; - union StencilInfo { - u32 raw; - BitField<0, 1, u32> format; + struct DepthBuffer { + enum class ZFormat : u32 { + Invald = 0, + Z16 = 1, + Z32Float = 2, + }; + + enum class StencilFormat : u32 { + Invalid = 0, + Stencil8 = 1, + }; + + union { + BitField<0, 2, ZFormat> format; + BitField<2, 2, u32> num_samples; + BitField<13, 3, u32> tile_split; + } z_info; + union { + BitField<0, 1, StencilFormat> format; + } stencil_info; + u32 z_read_base; + u32 stencil_read_base; + u32 z_write_base; + u32 stencil_write_base; + union { + BitField<0, 11, u32> pitch_tile_max; + BitField<11, 11, u32> height_tile_max; + } depth_size; + union { + BitField<0, 22, u32> tile_max; + } depth_slice; + + u32 Pitch() const { + return (depth_size.pitch_tile_max + 1) << 3; + } + + u32 Height() const { + return (depth_size.height_tile_max + 1) << 3; + } }; enum class ClipSpace : u32 { @@ -423,39 +449,6 @@ struct Liverpool { Swap8In64 = 3, }; - enum class Format : u32 { - Invalid = 0, - Color_8 = 1, - Color_16 = 2, - Color_8_8 = 3, - Color_32 = 4, - Color_16_16 = 5, - Color_10_11_11 = 6, - Color_11_11_10 = 7, - Color_10_10_10_2 = 8, - Color_2_10_10_10 = 9, - Color_8_8_8_8 = 10, - Color_32_32 = 11, - Color_16_16_16_16 = 12, - Color_32_32_32_32 = 14, - Color_5_6_5 = 16, - Color_1_5_5_5 = 17, - Color_5_5_5_1 = 18, - Color_4_4_4_4 = 19, - Color_8_24 = 20, - Color_24_8 = 21, - Color_X24_8_32_FL = 22, - }; - - enum class NumberType : u32 { - Unorm = 0, - Snorm = 1, - Uint = 4, - Sint = 5, - Srgb = 6, - Float = 7, - }; - enum class SwapMode : u32 { Standard = 0, Alternate = 1, @@ -482,9 +475,9 @@ struct Liverpool { } view; union { BitField<0, 2, EndianSwap> endian; - BitField<2, 5, Format> format; + BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberType> number_type; + BitField<8, 2, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; @@ -529,6 +522,12 @@ struct Liverpool { u64 CmaskAddress() const { return u64(cmask_base_address) << 8; } + + NumberFormat NumFormat() const { + // There is a small difference between T# and CB number types, account for it. + return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + : info.number_type; + } }; enum class PrimitiveType : u32 { @@ -563,14 +562,8 @@ struct Liverpool { u32 stencil_clear; u32 depth_clear; Scissor screen_scissor; - INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2); - StencilInfo stencil_info; - u32 z_read_base; - u32 stencil_read_base; - u32 z_write_base; - u32 stencil_write_base; - DepthSize depth_size; - DepthSlice depth_slice; + INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); + DepthBuffer depth_buffer; INSERT_PADDING_WORDS(0xA08E - 0xA018); ColorBufferMask color_target_mask; ColorBufferMask color_shader_mask; @@ -584,9 +577,12 @@ struct Liverpool { INSERT_PADDING_WORDS(1); std::array viewports; std::array clip_user_data; - INSERT_PADDING_WORDS(0xA1B1 - 0xA187); + INSERT_PADDING_WORDS(0xA191 - 0xA187); + std::array ps_inputs; VsOutputConfig vs_output_config; - INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1); + INSERT_PADDING_WORDS(4); + BitField<0, 6, u32> num_interp; + INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); ShaderPosFormat shader_pos_format; ShaderExportFormat z_export_format; ColorExportFormat color_export_format; @@ -616,6 +612,17 @@ struct Liverpool { VgtNumInstances num_instances; }; std::array reg_array{}; + + const ShaderProgram* ProgramForStage(u32 index) const { + switch (index) { + case 0: + return &vs_program; + case 4: + return &ps_program; + default: + return nullptr; + } + } }; Regs regs{}; @@ -656,14 +663,16 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); -static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); +static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); +static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 775fb1f1..f963370d 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -2,11 +2,45 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "video_core/amdgpu/pixel_format.h" namespace AmdGpu { -u32 getNumComponents(DataFormat format) { +std::string_view NameOf(NumberFormat fmt) { + switch (fmt) { + case NumberFormat::Unorm: + return "Unorm"; + case NumberFormat::Snorm: + return "Snorm"; + case NumberFormat::Uscaled: + return "Uscaled"; + case NumberFormat::Sscaled: + return "Sscaled"; + case NumberFormat::Uint: + return "Uint"; + case NumberFormat::Sint: + return "Sint"; + case NumberFormat::SnormNz: + return "SnormNz"; + case NumberFormat::Float: + return "Float"; + case NumberFormat::Srgb: + return "Srgb"; + case NumberFormat::Ubnorm: + return "Ubnorm"; + case NumberFormat::UbnromNz: + return "UbnormNz"; + case NumberFormat::Ubint: + return "Ubint"; + case NumberFormat::Ubscaled: + return "Unscaled"; + default: + UNREACHABLE(); + } +} + +u32 NumComponents(DataFormat format) { constexpr std::array numComponentsPerElement = { 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 488b00fc..7555cdb3 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -59,6 +61,18 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -u32 getNumComponents(DataFormat format); +[[nodiscard]] std::string_view NameOf(NumberFormat fmt); + +u32 NumComponents(DataFormat format); } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); + } +}; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8f9a76a2..906b937e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::QuadList: + // Needs to generate index buffer on the fly. + return vk::PrimitiveTopology::eTriangleList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; @@ -110,4 +113,42 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32B32A32Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32_32_32 && + num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR32G32B32Uint; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR8G8B8A8Unorm; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Srgb) { + return vk::Format::eR8G8B8A8Srgb; + } + UNREACHABLE(); +} + +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format) { + UNREACHABLE(); +} + +void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { + static constexpr u16 NumVerticesPerQuad = 4; + u16* out_data = reinterpret_cast(out_ptr); + for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { + *out_data++ = i; + *out_data++ = i + 1; + *out_data++ = i + 2; + *out_data++ = i + 2; + *out_data++ = i; + *out_data++ = i + 3; + } +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 97994bf8..38f021fd 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { @@ -20,4 +21,11 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); + +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format); + +void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e952263e..72ee6c9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { if (!frame) { if (!splash_img.has_value()) { - VideoCore::ImageInfo info{}; info.pixel_format = vk::Format::eR8G8B8A8Srgb; info.type = vk::ImageType::e2D; @@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr } Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { - // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6cbd26b9..3db09efe 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -4,22 +4,58 @@ #include #include "common/assert.h" +#include "core/memory.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { -GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, - vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, + const PipelineKey& key_, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules) - : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { + : instance{instance_}, scheduler{scheduler_}, key{key_} { const vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!infos[i]) { + continue; + } + stages[i] = *infos[i]; + } + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 0U, + .pSetLayouts = nullptr, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + const auto& vs_info = stages[0]; + for (const auto& input : vs_info.vs_inputs) { + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = u32(buffer.stride), + .inputRate = vk::VertexInputRate::eVertex, + }); + } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = 0U, - .pVertexBindingDescriptions = nullptr, - .vertexAttributeDescriptionCount = 0U, - .pVertexAttributeDescriptions = nullptr, + .vertexBindingDescriptionCount = static_cast(bindings.size()), + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data(), }; const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { @@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pName = "main", }; - const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; + const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); + const u32 num_color_formats = std::distance(key.color_formats.begin(), it); const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &color_format, - .depthAttachmentFormat = vk::Format::eUndefined, + .colorAttachmentCount = num_color_formats, + .pColorAttachmentFormats = key.color_formats.data(), + .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined, .stencilAttachmentFormat = vk::Format::eUndefined, }; @@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pDepthStencilState = &depth_info, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, - .layout = pipeline_layout, + .layout = *pipeline_layout, }; auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); @@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& GraphicsPipeline::~GraphicsPipeline() = default; +void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { + std::array buffers; + std::array offsets; + + const auto& vs_info = stages[0]; + const size_t num_buffers = vs_info.vs_inputs.size(); + for (u32 i = 0; i < num_buffers; ++i) { + const auto& input = vs_info.vs_inputs[i]; + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address); + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index d8b7887b..47cc5c23 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,19 +1,31 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/types.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" +namespace Core { +class MemoryManager; +} + namespace Vulkan { +static constexpr u32 MaxVertexBufferCount = 32; static constexpr u32 MaxShaderStages = 5; class Instance; +class Scheduler; using Liverpool = AmdGpu::Liverpool; struct PipelineKey { + std::array stage_hashes; + std::array color_formats; + vk::Format depth_format; + Liverpool::DepthControl depth; Liverpool::StencilControl stencil; Liverpool::StencilRefMask stencil_ref_front; @@ -21,26 +33,41 @@ struct PipelineKey { Liverpool::PrimitiveType prim_type; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; + + bool operator==(const PipelineKey& key) const noexcept { + return std::memcmp(this, &key, sizeof(PipelineKey)) == 0; + } }; static_assert(std::has_unique_object_representations_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, - vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, + explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, + const PipelineKey& key, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules); ~GraphicsPipeline(); + void BindResources(Core::MemoryManager* memory) const; + [[nodiscard]] vk::Pipeline Handle() const noexcept { return *pipeline; } private: const Instance& instance; + Scheduler& scheduler; vk::UniquePipeline pipeline; - vk::PipelineLayout pipeline_layout; - vk::PipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + std::array stages; PipelineKey key; }; } // namespace Vulkan + +template <> +struct std::hash { + std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept { + return XXH3_64bits(&key, sizeof(key)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0cde3e6e..32dca0c5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() { const std::string api_version = GetReadableVersion(properties.apiVersion); const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", ")); - LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name); - LOG_INFO(Render_Vulkan, "GPU_Model", model_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions); + LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name); + LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions); } void Instance::CollectToolingInfo() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 28fb51d0..6de86c4c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/scope_exit.h" +#include +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -11,60 +13,123 @@ namespace Vulkan { +Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, + const AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{}; + info.user_data = user_data; + info.stage = stage; + switch (stage) { + case Shader::Stage::Fragment: { + for (u32 i = 0; i < regs.num_interp; i++) { + info.ps_inputs.push_back({ + .param_index = regs.ps_inputs[i].input_offset.Value(), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = regs.ps_inputs[i].default_value, + }); + } + break; + } + default: + break; + } + return info; +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096}, + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = 0U, - .pSetLayouts = nullptr, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } -void PipelineCache::BindPipeline() { - SCOPE_EXIT { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); - }; +const GraphicsPipeline* PipelineCache::GetPipeline() { + RefreshKey(); + const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); + if (is_new) { + it.value() = CreatePipeline(); + } + const GraphicsPipeline* pipeline = it->second.get(); + return pipeline; +} - if (pipeline) { - return; +void PipelineCache::RefreshKey() { + auto& regs = liverpool->regs; + auto& key = graphics_key; + + key.depth = regs.depth_control; + key.stencil = regs.stencil_control; + key.stencil_ref_front = regs.stencil_ref_front; + key.stencil_ref_back = regs.stencil_ref_back; + key.prim_type = regs.primitive_type; + key.polygon_mode = regs.polygon_control.PolyMode(); + + const auto& db = regs.depth_buffer; + key.depth_format = key.depth.depth_enable + ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) + : vk::Format::eUndefined; + for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { + const auto& cb = regs.color_buffers[i]; + key.color_formats[i] = cb.base_address + ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) + : vk::Format::eUndefined; } - const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { - const u32* token = pgm.Address(); + for (u32 i = 0; i < MaxShaderStages; i++) { + auto* pgm = regs.ProgramForStage(i); + if (!pgm || !pgm->Address()) { + key.stage_hashes[i] = 0; + continue; + } + const u32* code = pgm->Address(); - // Retrieve shader header. Shader::BinaryInfo bininfo; - std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + key.stage_hashes[i] = bininfo.shader_hash; + } +} + +std::unique_ptr PipelineCache::CreatePipeline() { + const auto& regs = liverpool->regs; + + std::array programs; + std::array infos{}; + + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!graphics_key.stage_hashes[i]) { + stages[i] = VK_NULL_HANDLE; + continue; + } + auto* pgm = regs.ProgramForStage(i); + const u32* code = pgm->Address(); + + Shader::BinaryInfo bininfo; + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + const u32 num_dwords = bininfo.length / sizeof(u32); - // Lookup if the shader already exists. const auto it = module_map.find(bininfo.shader_hash); if (it != module_map.end()) { - return *it->second; + stages[i] = *it->second; + continue; } - // Compile and cache shader. - const auto data = std::span{token, bininfo.length / sizeof(u32)}; - const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data); - return CompileSPV(program, instance.GetDevice()); - }; + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); - // Retrieve shader stage modules. - // TODO: Only do this when program address is changed. - stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex); - stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment); + // Recompile shader to IR. + const auto stage = Shader::Stage{i}; + const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); + programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords}, + std::move(info)); - // Bind pipeline. - // TODO: Read entire key based on reg state. - graphics_key.prim_type = liverpool->regs.primitive_type; - graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode(); - pipeline = std::make_unique(instance, graphics_key, *pipeline_cache, - *pipeline_layout, stages); + // Compile IR to SPIR-V + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]); + stages[i] = CompileSPV(spv_code, instance.GetDevice()); + infos[i] = &programs[i].info; + } + + return std::make_unique(instance, scheduler, graphics_key, *pipeline_cache, + infos, stages); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7634f9cb..32830eab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -8,6 +8,10 @@ #include "shader_recompiler/object_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +namespace Shader { +struct Info; +} + namespace Vulkan { class Instance; @@ -21,7 +25,12 @@ public: AmdGpu::Liverpool* liverpool); ~PipelineCache() = default; - void BindPipeline(); + const GraphicsPipeline* GetPipeline(); + +private: + void RefreshKey(); + + std::unique_ptr CreatePipeline(); private: const Instance& instance; @@ -31,7 +40,7 @@ private: vk::UniquePipelineLayout pipeline_layout; tsl::robin_map module_map; std::array stages{}; - std::unique_ptr pipeline; + tsl::robin_map> graphics_pipelines; PipelineKey graphics_key{}; Shader::ObjectPool inst_pool; Shader::ObjectPool block_pool; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5f5d3d4e..3d301f62 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -18,33 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, - liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, + liverpool{liverpool_}, memory{Core::Memory::Instance()}, + pipeline_cache{instance, scheduler, liverpool}, vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } + + memory->SetInstance(&instance); } Rasterizer::~Rasterizer() = default; -void Rasterizer::DrawIndex() { +void Rasterizer::Draw(bool is_indexed) { const auto cmdbuf = scheduler.CommandBuffer(); - auto& regs = liverpool->regs; - - static bool first_time = true; - if (first_time) { - first_time = false; - return; - } - - UpdateDynamicState(); - - pipeline_cache.BindPipeline(); - - const u32 pitch = regs.color_buffers[0].Pitch(); - const u32 height = regs.color_buffers[0].Height(); - const u32 tile_max = regs.color_buffers[0].slice.tile_max; - auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch); + const auto& regs = liverpool->regs; + const u32 num_indices = SetupIndexBuffer(is_indexed); + const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline(); + pipeline->BindResources(memory); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, @@ -61,13 +54,50 @@ void Rasterizer::DrawIndex() { .pColorAttachments = &color_info, }; + UpdateDynamicState(); + cmdbuf.beginRendering(rendering_info); - cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); - cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); - cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + if (is_indexed) { + cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); + } else { + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + } cmdbuf.endRendering(); } +u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { + // Emulate QuadList primitive type with CPU made index buffer. + const auto& regs = liverpool->regs; + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) { + ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); + is_indexed = true; + + // Emit indices. + const u32 index_size = 3 * regs.num_indices; + const auto [data, offset, _] = vertex_index_buffer.Map(index_size); + LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + vertex_index_buffer.Commit(index_size); + + // Bind index buffer. + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16); + return index_size / sizeof(u16); + } + if (!is_indexed) { + return 0; + } + + const VAddr index_address = regs.index_base_address.Address(); + const auto [buffer, offset] = memory->GetVulkanBuffer(index_address); + const vk::IndexType index_type = + regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16 + : vk::IndexType::eUint32; + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(buffer, offset, index_type); + return regs.num_indices; +} + void Rasterizer::UpdateDynamicState() { UpdateViewportScissorState(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ba3c2d3a..a8386c25 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,7 +3,6 @@ #pragma once -#include #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -11,6 +10,10 @@ namespace AmdGpu { struct Liverpool; } +namespace Core { +class MemoryManager; +} + namespace VideoCore { class TextureCache; } @@ -26,17 +29,14 @@ public: VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); ~Rasterizer(); - /// Performs a draw call with an index buffer. - void DrawIndex(); - - /// Updates graphics state that is not part of the bound pipeline. - void UpdateDynamicState(); + void Draw(bool is_indexed); private: - /// Updates viewport and scissor from liverpool registers. - void UpdateViewportScissorState(); + u32 SetupIndexBuffer(bool& is_indexed); + void MapMemory(VAddr addr, size_t size); - /// Updates depth and stencil pipeline state from liverpool registers. + void UpdateDynamicState(); + void UpdateViewportScissorState(); void UpdateDepthStencilState(); private: @@ -44,6 +44,7 @@ private: Scheduler& scheduler; VideoCore::TextureCache& texture_cache; AmdGpu::Liverpool* liverpool; + Core::MemoryManager* memory; PipelineCache pipeline_cache; StreamBuffer vertex_index_buffer; }; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index d31a1f5d..637f03d0 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,7 +35,7 @@ public: * @param size Size to reserve. * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::tuple Map(u64 size, u64 alignment = 0); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Commit(u64 size); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b78d2563..6a7bba8e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/config.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/image.h" @@ -65,6 +66,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { + is_tiled = true; + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + type = vk::ImageType::e2D; + size.width = buffer.Pitch(); + size.height = buffer.Height(); + pitch = size.width; + guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); +} + UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c1bddec7..92391fde 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -6,6 +6,7 @@ #include "common/enum.h" #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; bool is_tiled = false; vk::Format pixel_format = vk::Format::eUndefined; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 15679ba9..e21bb6ed 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -101,8 +101,8 @@ TextureCache::~TextureCache() { } void TextureCache::OnCpuWrite(VAddr address) { - const VAddr address_aligned = address & ~((1 << PageBits) - 1); - ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) { + const VAddr address_aligned = address & ~((1 << PageShift) - 1); + ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -137,26 +137,20 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } -ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { - boost::container::small_vector image_ids; - ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { - if (image.cpu_addr == cpu_address) { - image_ids.push_back(image_id); - } - }); +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { + const ImageInfo info{buffer}; + auto& image = FindImage(info, buffer.Address()); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); - auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; - - ImageViewInfo info; - info.format = vk::Format::eB8G8R8A8Srgb; - if (const ImageViewId view_id = image->FindView(info); view_id) { + ImageViewInfo view_info; + view_info.format = info.pixel_format; + if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } - const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); - image->image_view_infos.emplace_back(info); - image->image_view_ids.emplace_back(view_id); + const ImageViewId view_id = + slot_image_views.insert(instance, scheduler, view_info, image.image); + image.image_view_infos.emplace_back(view_info); + image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } @@ -225,13 +219,13 @@ void TextureCache::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); return; } auto& image_ids = page_it.value(); const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); return; } image_ids.erase(vector_it); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a11201c4..f59f16c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,7 @@ public: Image& FindImage(const ImageInfo& info, VAddr cpu_address); /// Retrieves the render target with specified properties - ImageView& RenderTarget(VAddr cpu_address, u32 pitch); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); /// Reuploads image contents. void RefreshImage(Image& image);