diff --git a/.gitmodules b/.gitmodules index 76ca5dca..385021a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,12 +50,12 @@ [submodule "externals/toml11"] path = externals/toml11 url = https://github.com/ToruNiina/toml11.git -[submodule "externals/xxHash"] - path = externals/xxHash - url = https://github.com/Cyan4973/xxHash.git [submodule "externals/zydis"] path = externals/zydis url = https://github.com/zyantific/zydis.git [submodule "externals/sirit"] path = externals/sirit url = https://github.com/raphaelthegreat/sirit +[submodule "externals/xxhash"] + path = externals/xxhash + url = https://github.com/Cyan4973/xxHash.git diff --git a/CMakeLists.txt b/CMakeLists.txt index dae1e4f2..150c1001 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/frontend/control_flow_graph.h src/shader_recompiler/frontend/decode.cpp src/shader_recompiler/frontend/decode.h + src/shader_recompiler/frontend/fetch_shader.cpp + src/shader_recompiler/frontend/fetch_shader.h src/shader_recompiler/frontend/format.cpp src/shader_recompiler/frontend/instruction.cpp src/shader_recompiler/frontend/instruction.h @@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/constant_propogation_pass.cpp + src/shader_recompiler/ir/passes/info_collection.cpp src/shader_recompiler/ir/passes/passes.h src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 24bca2f1..b2d348b7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL) add_subdirectory(toml11 EXCLUDE_FROM_ALL) # xxHash -add_library(xxhash INTERFACE) -target_include_directories(xxhash INTERFACE xxhash) +add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c) +target_include_directories(xxhash PUBLIC xxhash) # Zydis option(ZYDIS_BUILD_TOOLS "" OFF) @@ -92,4 +92,4 @@ endif() add_subdirectory(sirit EXCLUDE_FROM_ALL) if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") -endif() \ No newline at end of file +endif() diff --git a/externals/xxHash b/externals/xxhash similarity index 100% rename from externals/xxHash rename to externals/xxhash diff --git a/src/core/memory.cpp b/src/core/memory.cpp index aa5c6676..a16abbe0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -7,6 +7,7 @@ #include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/memory.h" +#include "video_core/renderer_vulkan/vk_instance.h" namespace Core { @@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.prot = prot; new_vma.name = name; new_vma.type = type; + + if (type == VMAType::Direct) { + MapVulkanMemory(mapped_addr, size); + } }; // When virtual addr is zero let the address space manager pick the address. @@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, "Attempting to unmap partially mapped range"); + if (it->second.type == VMAType::Direct) { + UnmapVulkanMemory(virtual_addr, size); + } + // Mark region as free and attempt to coalesce it with neighbours. auto& vma = it->second; vma.type = VMAType::Free; @@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { impl.Unmap(virtual_addr, size); } +std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { + auto it = mapped_memories.upper_bound(addr); + it = std::prev(it); + ASSERT(it != mapped_memories.end() && it->first <= addr); + return std::make_pair(*it->second.buffer, addr - it->first); +} + VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); @@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) { return iter; } +void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { + const vk::Device device = instance->GetDevice(); + const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); + void* host_pointer = reinterpret_cast(addr); + const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT( + vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer); + ASSERT(host_mem_props.memoryTypeBits != 0); + + int mapped_memory_type = -1; + auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) { + u32 host_mem_types = host_mem_props.memoryTypeBits; + while (host_mem_types != 0) { + // Try to find a cached memory type + mapped_memory_type = std::countr_zero(host_mem_types); + host_mem_types -= (1 << mapped_memory_type); + + if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) { + return; + } + } + + mapped_memory_type = -1; + }; + + // First try to find a memory that is both coherent and cached + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eHostCached); + if (mapped_memory_type == -1) + // Then only coherent (lower performance) + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent); + + if (mapped_memory_type == -1) { + LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping"); + mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits); + } + + const vk::StructureChain alloc_info = { + vk::MemoryAllocateInfo{ + .allocationSize = size, + .memoryTypeIndex = static_cast(mapped_memory_type), + }, + vk::ImportMemoryHostPointerInfoEXT{ + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + .pHostPointer = host_pointer, + }, + }; + + const auto [it, new_memory] = mapped_memories.try_emplace(addr); + ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory"); + + auto& memory = it->second; + memory.backing = device.allocateMemoryUnique(alloc_info.get()); + + constexpr vk::BufferUsageFlags MapFlags = + vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eUniformBuffer; + + const vk::StructureChain buffer_info = { + vk::BufferCreateInfo{ + .size = size, + .usage = MapFlags, + .sharingMode = vk::SharingMode::eExclusive, + }, + vk::ExternalMemoryBufferCreateInfoKHR{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + }}; + memory.buffer = device.createBufferUnique(buffer_info.get()); + device.bindBufferMemory(*memory.buffer, *memory.backing, 0); +} + +void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { + const auto it = mapped_memories.find(addr); + ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); + mapped_memories.erase(it); +} + } // namespace Core diff --git a/src/core/memory.h b/src/core/memory.h index a86930c8..4c0fadbf 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -10,6 +11,11 @@ #include "common/singleton.h" #include "common/types.h" #include "core/address_space.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +} namespace Core { @@ -86,6 +92,10 @@ public: explicit MemoryManager(); ~MemoryManager(); + void SetInstance(const Vulkan::Instance* instance_) { + instance = instance_; + } + PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, int memory_type); @@ -97,11 +107,9 @@ public: void UnmapMemory(VAddr virtual_addr, size_t size); -private: - bool HasOverlap(VAddr addr, size_t size) const { - return vma_map.find(addr) != vma_map.end(); - } + std::pair GetVulkanBuffer(VAddr addr); +private: VMAHandle FindVMA(VAddr target) { // Return first the VMA with base >= target. const auto it = vma_map.lower_bound(target); @@ -117,10 +125,22 @@ private: VMAHandle MergeAdjacent(VMAHandle iter); + void MapVulkanMemory(VAddr addr, size_t size); + + void UnmapVulkanMemory(VAddr addr, size_t size); + private: AddressSpace impl; std::vector allocations; VMAMap vma_map; + + struct MappedMemory { + vk::UniqueBuffer buffer; + vk::UniqueDeviceMemory backing; + size_t buffer_size; + }; + std::map mapped_memories; + const Vulkan::Instance* instance{}; }; using Memory = Common::Singleton; diff --git a/src/main.cpp b/src/main.cpp index 2a7f839e..43196046 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,7 +20,6 @@ #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" #include "core/linker.h" -#include "core/tls.h" #include "input/controller.h" #include "sdl_window.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index f341d465..fb9c67d6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.stage) { + switch (program.info.stage) { case Stage::Compute: { // const std::array workgroup_size{program.workgroup_size}; // execution_model = spv::ExecutionModel::GLCompute; @@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { // } break; default: - throw NotImplementedException("Stage {}", u32(program.stage)); + throw NotImplementedException("Stage {}", u32(program.info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -222,7 +222,7 @@ std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Binding EmitContext ctx{profile, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - if (program.stage == Stage::Vertex) { + if (program.info.stage == Stage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0ba72f2a..2ed4e29d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,12 +10,11 @@ namespace { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index).at(element)}; + const auto& info{ctx.output_params.at(index)}; if (info.num_components == 1) { return info.id; } else { - const u32 index_element{element - info.first_element}; - return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element)); + return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element)); } } switch (attr) { @@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR:: throw LogicError("Unreachable instruction"); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { - const u32 element{static_cast(attr) % 4}; +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; if (!ValidId(param.id)) { // Attribute is disabled or varying component is not written - return ctx.ConstF32(element == 3 ? 1.0f : 0.0f); + return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); } - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))}; + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; return ctx.OpLoad(param.component_type, pointer); } throw NotImplementedException("Read attribute {}", attr); } -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { switch (attr) { case IR::Attribute::VertexId: return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); @@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Param0) { - return; - } const Id pointer{OutputAttrPointer(ctx, attr, element)}; ctx.OpStore(pointer, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 20d58e90..24685275 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu const IR::Value& offset); Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e9a55766..771e46d4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} { u32& uniform_binding{bindings.unified}; u32& storage_binding{bindings.unified}; u32& texture_binding{bindings.unified}; @@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() { u32_zero_value = ConstU32(0U); f32_zero_value = ConstF32(0.0f); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); } @@ -107,26 +111,123 @@ void EmitContext::DefineInterfaces(const IR::Program& program) { DefineOutputs(program); } +Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Sint: + return ctx.S32[4]; + case AmdGpu::NumberFormat::Uint: + return ctx.U32[4]; + case AmdGpu::NumberFormat::Sscaled: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Uscaled: + return ctx.F32[4]; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uint: + return {id, input_u32, U32[1], 4}; + case AmdGpu::NumberFormat::Sint: + return {id, input_s32, S32[1], 4}; + case AmdGpu::NumberFormat::Sscaled: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uscaled: + return {id, input_f32, F32[1], 4}; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { + switch (default_value) { + case 0: + return ctx.ConstF32(0.f, 0.f, 0.f, 0.f); + case 1: + return ctx.ConstF32(0.f, 0.f, 0.f, 1.f); + case 2: + return ctx.ConstF32(1.f, 1.f, 1.f, 0.f); + case 3: + return ctx.ConstF32(1.f, 1.f, 1.f, 1.f); + default: + UNREACHABLE(); + } +} + void EmitContext::DefineInputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + for (const auto& input : info.vs_inputs) { + const Id type{GetAttributeType(*this, input.fmt)}; + const Id id{DefineInput(type, input.binding)}; + Name(id, fmt::format("vs_in_attr{}", input.binding)); + input_params[input.binding] = GetAttributeInfo(input.fmt, id); + interfaces.push_back(id); + } break; + case Stage::Fragment: + for (const auto& input : info.ps_inputs) { + if (input.is_default) { + input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), + input_f32, F32[1]}; + continue; + } + const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; + const u32 num_components = info.loads.NumComponents(param); + const Id type{F32[num_components]}; + const Id id{DefineInput(type, input.semantic)}; + if (input.is_flat) { + Decorate(id, spv::Decoration::Flat); + } + Name(id, fmt::format("fs_in_attr{}", input.semantic)); + input_params[input.semantic] = {id, input_f32, F32[1], num_components}; + interfaces.push_back(id); + } default: break; } } void EmitContext::DefineOutputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = {id, output_f32, F32[1], num_components}; + interfaces.push_back(id); + } break; case Stage::Fragment: - frag_color[0] = DefineOutput(F32[4], 0); - Name(frag_color[0], fmt::format("frag_color{}", 0)); - interfaces.push_back(frag_color[0]); + for (u32 i = 0; i < IR::NumRenderTargets; i++) { + const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; + if (!info.stores.GetAny(mrt)) { + continue; + } + frag_color[i] = DefineOutput(F32[4], i); + Name(frag_color[i], fmt::format("frag_color{}", i)); + interfaces.push_back(frag_color[i]); + } break; default: break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index bf78a445..26298e38 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -135,6 +135,9 @@ public: Id u32_zero_value{}; Id f32_zero_value{}; + Id input_u32{}; + Id input_f32{}; + Id input_s32{}; Id output_u32{}; Id output_f32{}; @@ -145,25 +148,22 @@ public: Id base_vertex{}; std::array frag_color{}; - struct InputParamInfo { + struct SpirvAttribute { Id id; Id pointer_type; Id component_type; + u32 num_components; }; - std::array input_params{}; - - struct ParamElementInfo { - Id id{}; - u32 first_element{}; - u32 num_components{}; - }; - std::array, 32> output_params{}; + std::array input_params{}; + std::array output_params{}; private: void DefineArithmeticTypes(); void DefineInterfaces(const IR::Program& program); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); + + SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp new file mode 100644 index 00000000..7f4f50e9 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/fetch_shader.h" + +namespace Shader::Gcn { + +/** + * s_load_dwordx4 s[8:11], s[2:3], 0x00 + * s_load_dwordx4 s[12:15], s[2:3], 0x04 + * s_load_dwordx4 s[16:19], s[2:3], 0x08 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen + * buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen + * buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen + * s_waitcnt 0 + * s_setpc_b64 s[0:1] + + * s_load_dwordx4 s[4:7], s[2:3], 0x0 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen + * s_load_dwordx4 s[4:7], s[2:3], 0x8 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen + * s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) + * s_setpc_b64 s[0:1] + + * A normal fetch shader looks like the above, the instructions are generated + * using input semantics on cpu side. Load instructions can either be separate or interleaved + * We take the reverse way, extract the original input semantics from these instructions. + **/ + +std::vector ParseFetchShader(const u32* code) { + std::vector attributes; + GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); + GcnDecodeContext decoder; + + struct VsharpLoad { + u32 dword_offset{}; + s32 base_sgpr{}; + s32 dst_reg{-1}; + }; + boost::container::static_vector loads; + + u32 semantic_index = 0; + while (!code_slice.atEnd()) { + const auto inst = decoder.decodeInstruction(code_slice); + if (inst.opcode == Opcode::S_SETPC_B64) { + break; + } + + if (inst.inst_class == InstClass::ScalarMemRd) { + loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code); + continue; + } + + if (inst.inst_class == InstClass::VectorMemBufFmt) { + // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs + const u32 base_sgpr = inst.src[2].code * 4; + + // Find the load instruction that loaded the V# to the SPGR. + // This is so we can determine its index in the vertex table. + const auto it = std::ranges::find_if( + loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; }); + + auto& attrib = attributes.emplace_back(); + attrib.semantic = semantic_index++; + attrib.dest_vgpr = inst.src[1].code; + attrib.num_elements = inst.control.mubuf.count; + attrib.sgpr_base = it->base_sgpr; + attrib.dword_offset = it->dword_offset; + + // Mark load as used. + it->dst_reg = -1; + } + } + + return attributes; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h new file mode 100644 index 00000000..2f8eae12 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::Gcn { + +struct VertexAttribute { + u8 semantic; ///< Semantic index of the attribute + u8 dest_vgpr; ///< Destination VGPR to load first component. + u8 num_elements; ///< Number of components to load + u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# + u8 dword_offset; ///< The dword offset of the V# that describes this attribute. +}; + +std::vector ParseFetchShader(const u32* code); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index f593529d..2064c6a5 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -600,9 +600,9 @@ public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Stage stage_) + Info& info_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; @@ -633,8 +633,7 @@ private: ensure_block(); const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; - Translate(current_block, stage, inst_list.subspan(start, size)); - fmt::print("{}\n", IR::DumpBlock(*current_block)); + Translate(current_block, inst_list.subspan(start, size), info); break; } case StatementType::SetVariable: { @@ -812,17 +811,17 @@ private: IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{}; std::span inst_list; - Stage stage; + Info& info; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Stage stage) { + CFG& cfg, Info& info) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index fa7b6738..09814349 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -10,13 +10,13 @@ #include "shader_recompiler/object_pool.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, CFG& cfg, - Stage stage); + Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 002351ca..06faf28d 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} { +Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} { IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (stage) { + switch (info.stage) { case Stage::Vertex: // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); @@ -92,11 +94,39 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { } } -void Translate(IR::Block* block, Stage stage, std::span inst_list) { +void Translator::EmitFetch(const GcnInst& inst) { + // Read the pointer to the fetch shader assembly. + const u32 sgpr_base = inst.src[0].code; + const u32* code; + std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); + + // Parse the assembly to generate a list of attributes. + const auto attribs = ParseFetchShader(code); + for (const auto& attrib : attribs) { + const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; + IR::VectorReg dst_reg{attrib.dest_vgpr}; + for (u32 i = 0; i < attrib.num_elements; i++) { + ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i)); + } + + // Read the V# of the attribute to figure out component number and type. + const auto buffer = info.ReadUd(attrib.sgpr_base, attrib.dword_offset); + const u32 num_components = AmdGpu::NumComponents(buffer.data_format); + info.vs_inputs.push_back({ + .fmt = buffer.num_format, + .binding = attrib.semantic, + .num_components = std::min(attrib.num_elements, num_components), + .sgpr_base = attrib.sgpr_base, + .dword_offset = attrib.dword_offset, + }); + } +} + +void Translate(IR::Block* block, std::span inst_list, Info& info) { if (inst_list.empty()) { return; } - Translator translator{block, stage}; + Translator translator{block, info}; for (const auto& inst : inst_list) { switch (inst.opcode) { case Opcode::S_MOV_B32: @@ -115,6 +145,9 @@ void Translate(IR::Block* block, Stage stage, std::span inst_list translator.V_MUL_F32(inst); break; case Opcode::S_SWAPPC_B64: + ASSERT(info.stage == Stage::Vertex); + translator.EmitFetch(inst); + break; case Opcode::S_WAITCNT: break; // Ignore for now. case Opcode::S_BUFFER_LOAD_DWORDX16: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f972bef..7e0186f3 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -7,9 +7,10 @@ #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/runtime_info.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { @@ -25,7 +26,9 @@ enum class ConditionOp : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Stage stage); + explicit Translator(IR::Block* block_, Info& info); + + void EmitFetch(const GcnInst& inst); // Scalar ALU void S_MOV(const GcnInst& inst); @@ -66,8 +69,9 @@ private: private: IR::Block* block; IR::IREmitter ir; + Info& info; }; -void Translate(IR::Block* block, Stage stage, std::span inst_list); +void Translate(IR::Block* block, std::span inst_list, Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index cbb3268c..99cebdd2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) { void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])), - ir.FPConvert(16, GetSrc(inst.src[1]))); - ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32)); + const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } void Translator::V_MUL_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 47c98cd7..7d41d430 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,7 +7,9 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr}; + auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + attr.semantic = inst.control.vintrp.attr; + const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 714053bc..3b60bf65 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) { return "Param31"; case Attribute::VertexId: return "VertexId"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::FragCoord: + return "FragCoord"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index a4d76dbf..687d3ad4 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,10 +72,12 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, + Max, }; -constexpr size_t EXP_NUM_POS = 4; -constexpr size_t EXP_NUM_PARAM = 32; +constexpr size_t NumAttributes = static_cast(Attribute::Max); +constexpr size_t NumRenderTargets = 8; +constexpr size_t NumParams = 32; [[nodiscard]] bool IsParam(Attribute attribute) noexcept; @@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32; if (result > static_cast(Attribute::Param31)) { throw LogicError("Overflow on register arithmetic"); } - if (result < static_cast(Attribute::Param0)) { + if (result < static_cast(Attribute::RenderTarget0)) { throw LogicError("Underflow on register arithmetic"); } return static_cast(result); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index aae23ef6..8bea18e0 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) { Inst(Opcode::SetVcc, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return GetAttribute(attribute, 0); -} - F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } -U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { - return GetAttributeU32(attribute, 0); -} - U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttributeU32, attribute, Imm32(comp)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 8c8f657e..f6bc8807 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -58,11 +58,9 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp); - void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0); + [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); + void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); diff --git a/src/shader_recompiler/ir/passes/info_collection.cpp b/src/shader_recompiler/ir/passes/info_collection.cpp new file mode 100644 index 00000000..99aedbc4 --- /dev/null +++ b/src/shader_recompiler/ir/passes/info_collection.cpp @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void Visit(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetAttribute: + case IR::Opcode::GetAttributeU32: { + info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); + break; + } + case IR::Opcode::SetAttribute: { + info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); + break; + } + default: + break; + } +} + +void CollectShaderInfoPass(IR::Program& program) { + Info& info{program.info}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h index 49bb09b1..915bb80e 100644 --- a/src/shader_recompiler/ir/passes/passes.h +++ b/src/shader_recompiler/ir/passes/passes.h @@ -4,6 +4,7 @@ #pragma once #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" namespace Shader::Optimization { @@ -11,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program); -void ResourceTrackingPass(IR::BlockList& program); +void ResourceTrackingPass(IR::Program& program); +void CollectShaderInfoPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index feb213df..39f0b808 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) { }; } -void ResourceTrackingPass(IR::BlockList& program) { - for (IR::Block* const block : program) { +void ResourceTrackingPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsResourceInstruction(inst)) { continue; } - printf("ff\n"); IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto loc = TrackSharp(producer->Arg(0)); fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f4f5197f..eff933f2 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -3,15 +3,11 @@ #pragma once -#include #include #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" - -namespace Shader { -enum class Stage : u32; -} +#include "shader_recompiler/runtime_info.h" namespace Shader::IR { @@ -20,7 +16,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; std::vector ins_list; - Stage stage; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5bc521bd..66d19620 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -30,9 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span token) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span token, const Info&& info) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -40,6 +38,11 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); Gcn::GcnDecodeContext decoder; + static int counter = 0; + std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary); + file.write((const char*)token.data(), token.size_bytes()); + file.close(); + // Decode and save instructions IR::Program program; program.ins_list.reserve(token.size()); @@ -52,21 +55,24 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage); + program.info = std::move(info); + program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); - program.stage = stage; // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); - // Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); + Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::DeadCodeEliminationPass(program.blocks); + Shader::Optimization::CollectShaderInfoPass(program); - // TODO: Pass profile from vulkan backend - const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); - return code; + for (const auto& block : program.blocks) { + fmt::print("{}\n", IR::DumpBlock(*block)); + } + + return program; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8cd9c7ea..da6cdfaa 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,7 +3,9 @@ #pragma once +#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/object_pool.h" namespace Shader { @@ -26,8 +28,8 @@ struct BinaryInfo { u32 crc32; }; -[[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span code); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, + std::span code, const Info&& info); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 052108b7..40c9c6b0 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,39 +3,16 @@ #pragma once -#include -#include -#include "shader_recompiler/ir/type.h" +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "video_core/amdgpu/pixel_format.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - SignedScaled, - UnsignedScaled, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; +static constexpr size_t NumUserDataRegs = 16; enum class Stage : u32 { Vertex, @@ -62,78 +39,64 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 7; -enum class Interpolation { - Smooth, - Flat, - NoPerspective, -}; - -struct ConstantBufferDescriptor { - u32 index; - u32 count; - - auto operator<=>(const ConstantBufferDescriptor&) const = default; -}; - -struct TextureDescriptor { - TextureType type; - bool is_eud; - bool is_depth; - bool is_multisample; - bool is_storage; - u32 count; - u32 eud_offset_dwords; - u32 ud_index_dwords; - - auto operator<=>(const TextureDescriptor&) const = default; -}; -using TextureDescriptors = boost::container::small_vector; - struct Info { - bool uses_workgroup_id{}; - bool uses_local_invocation_id{}; - bool uses_invocation_id{}; - bool uses_invocation_info{}; - bool uses_sample_id{}; + struct VsInput { + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + u8 sgpr_base; + u8 dword_offset; + }; + boost::container::static_vector vs_inputs{}; - std::array interpolation{}; - // VaryingState loads; - // VaryingState stores; - // VaryingState passthrough; + struct PsInput { + u32 param_index; + u32 semantic; + bool is_default; + bool is_flat; + u32 default_value; + }; + boost::container::static_vector ps_inputs{}; - std::array stores_frag_color{}; - bool stores_sample_mask{}; - bool stores_frag_depth{}; + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[Index(attrib)] & (1 << comp); + } - bool uses_fp16{}; - bool uses_fp64{}; - bool uses_fp16_denorms_flush{}; - bool uses_fp16_denorms_preserve{}; - bool uses_fp32_denorms_flush{}; - bool uses_fp32_denorms_preserve{}; - bool uses_int8{}; - bool uses_int16{}; - bool uses_int64{}; - bool uses_image_1d{}; - bool uses_sampled_1d{}; - bool uses_subgroup_vote{}; - bool uses_subgroup_mask{}; - bool uses_derivatives{}; + bool GetAny(IR::Attribute attrib) const { + return flags[Index(attrib)]; + } - IR::Type used_constant_buffer_types{}; - IR::Type used_storage_buffer_types{}; - IR::Type used_indirect_cbuf_types{}; + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[Index(attrib)] |= (1 << comp); + } - // std::array constant_buffer_used_sizes{}; - u32 used_clip_distances{}; + u32 NumComponents(IR::Attribute attrib) const { + const u8 mask = flags[Index(attrib)]; + ASSERT(mask != 0b1011 || mask != 0b1101); + return std::popcount(mask); + } - // boost::container::static_vector - // constant_buffer_descriptors; - // boost::container::static_vector - // storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; - // ImageBufferDescriptors image_buffer_descriptors; - // TextureDescriptors texture_descriptors; - // ImageDescriptors image_descriptors; + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + std::span user_data; + Stage stage; + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + u32* base; + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } }; } // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 09c1cb66..d43f749b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->DrawIndex(); + rasterizer->Draw(true); } break; } @@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - // rasterizer->DrawIndex(); + if (rasterizer) { + rasterizer->Draw(false); + } break; } case PM4ItOpcode::DispatchDirect: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index f0a27bb1..83fd2494 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" +#include "video_core/amdgpu/pixel_format.h" #include #include @@ -32,13 +33,13 @@ struct Liverpool { static constexpr u32 NumColorBuffers = 8; static constexpr u32 NumViewports = 16; static constexpr u32 NumClipPlanes = 6; - static constexpr u32 NumWordsShaderUserData = 16; + static constexpr u32 NumShaderUserData = 16; static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 ContextRegWordOffset = 0xA000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; - using UserData = std::array; + using UserData = std::array; struct ShaderProgram { u32 address_lo; @@ -57,6 +58,14 @@ struct Liverpool { } }; + union PsInputControl { + u32 raw; + BitField<0, 5, u32> input_offset; + BitField<5, 1, u32> use_default; + BitField<8, 2, u32> default_value; + BitField<10, 1, u32> flat_shade; + }; + enum class ShaderExportComp : u32 { None = 0, OneComp = 1, @@ -171,25 +180,6 @@ struct Liverpool { BitField<31, 1, u32> disable_color_writes_on_depth_pass; }; - union DepthSize { - u32 raw; - BitField<0, 11, u32> pitch_tile_max; - BitField<11, 11, u32> height_tile_max; - - u32 Pitch() const { - return (pitch_tile_max + 1) << 3; - } - - u32 Height() const { - return (height_tile_max + 1) << 3; - } - }; - - union DepthSlice { - u32 raw; - BitField<0, 22, u32> slice_tile_max; - }; - enum class StencilFunc : u32 { Keep = 0, Zero = 1, @@ -227,9 +217,45 @@ struct Liverpool { BitField<24, 8, u32> stencil_op_val; }; - union StencilInfo { - u32 raw; - BitField<0, 1, u32> format; + struct DepthBuffer { + enum class ZFormat : u32 { + Invald = 0, + Z16 = 1, + Z32Float = 2, + }; + + enum class StencilFormat : u32 { + Invalid = 0, + Stencil8 = 1, + }; + + union { + BitField<0, 2, ZFormat> format; + BitField<2, 2, u32> num_samples; + BitField<13, 3, u32> tile_split; + } z_info; + union { + BitField<0, 1, StencilFormat> format; + } stencil_info; + u32 z_read_base; + u32 stencil_read_base; + u32 z_write_base; + u32 stencil_write_base; + union { + BitField<0, 11, u32> pitch_tile_max; + BitField<11, 11, u32> height_tile_max; + } depth_size; + union { + BitField<0, 22, u32> tile_max; + } depth_slice; + + u32 Pitch() const { + return (depth_size.pitch_tile_max + 1) << 3; + } + + u32 Height() const { + return (depth_size.height_tile_max + 1) << 3; + } }; enum class ClipSpace : u32 { @@ -423,39 +449,6 @@ struct Liverpool { Swap8In64 = 3, }; - enum class Format : u32 { - Invalid = 0, - Color_8 = 1, - Color_16 = 2, - Color_8_8 = 3, - Color_32 = 4, - Color_16_16 = 5, - Color_10_11_11 = 6, - Color_11_11_10 = 7, - Color_10_10_10_2 = 8, - Color_2_10_10_10 = 9, - Color_8_8_8_8 = 10, - Color_32_32 = 11, - Color_16_16_16_16 = 12, - Color_32_32_32_32 = 14, - Color_5_6_5 = 16, - Color_1_5_5_5 = 17, - Color_5_5_5_1 = 18, - Color_4_4_4_4 = 19, - Color_8_24 = 20, - Color_24_8 = 21, - Color_X24_8_32_FL = 22, - }; - - enum class NumberType : u32 { - Unorm = 0, - Snorm = 1, - Uint = 4, - Sint = 5, - Srgb = 6, - Float = 7, - }; - enum class SwapMode : u32 { Standard = 0, Alternate = 1, @@ -482,9 +475,9 @@ struct Liverpool { } view; union { BitField<0, 2, EndianSwap> endian; - BitField<2, 5, Format> format; + BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberType> number_type; + BitField<8, 2, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; @@ -529,6 +522,12 @@ struct Liverpool { u64 CmaskAddress() const { return u64(cmask_base_address) << 8; } + + NumberFormat NumFormat() const { + // There is a small difference between T# and CB number types, account for it. + return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + : info.number_type; + } }; enum class PrimitiveType : u32 { @@ -563,14 +562,8 @@ struct Liverpool { u32 stencil_clear; u32 depth_clear; Scissor screen_scissor; - INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2); - StencilInfo stencil_info; - u32 z_read_base; - u32 stencil_read_base; - u32 z_write_base; - u32 stencil_write_base; - DepthSize depth_size; - DepthSlice depth_slice; + INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); + DepthBuffer depth_buffer; INSERT_PADDING_WORDS(0xA08E - 0xA018); ColorBufferMask color_target_mask; ColorBufferMask color_shader_mask; @@ -584,9 +577,12 @@ struct Liverpool { INSERT_PADDING_WORDS(1); std::array viewports; std::array clip_user_data; - INSERT_PADDING_WORDS(0xA1B1 - 0xA187); + INSERT_PADDING_WORDS(0xA191 - 0xA187); + std::array ps_inputs; VsOutputConfig vs_output_config; - INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1); + INSERT_PADDING_WORDS(4); + BitField<0, 6, u32> num_interp; + INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); ShaderPosFormat shader_pos_format; ShaderExportFormat z_export_format; ColorExportFormat color_export_format; @@ -616,6 +612,17 @@ struct Liverpool { VgtNumInstances num_instances; }; std::array reg_array{}; + + const ShaderProgram* ProgramForStage(u32 index) const { + switch (index) { + case 0: + return &vs_program; + case 4: + return &ps_program; + default: + return nullptr; + } + } }; Regs regs{}; @@ -656,14 +663,16 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); -static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); +static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); +static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 775fb1f1..f963370d 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -2,11 +2,45 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "video_core/amdgpu/pixel_format.h" namespace AmdGpu { -u32 getNumComponents(DataFormat format) { +std::string_view NameOf(NumberFormat fmt) { + switch (fmt) { + case NumberFormat::Unorm: + return "Unorm"; + case NumberFormat::Snorm: + return "Snorm"; + case NumberFormat::Uscaled: + return "Uscaled"; + case NumberFormat::Sscaled: + return "Sscaled"; + case NumberFormat::Uint: + return "Uint"; + case NumberFormat::Sint: + return "Sint"; + case NumberFormat::SnormNz: + return "SnormNz"; + case NumberFormat::Float: + return "Float"; + case NumberFormat::Srgb: + return "Srgb"; + case NumberFormat::Ubnorm: + return "Ubnorm"; + case NumberFormat::UbnromNz: + return "UbnormNz"; + case NumberFormat::Ubint: + return "Ubint"; + case NumberFormat::Ubscaled: + return "Unscaled"; + default: + UNREACHABLE(); + } +} + +u32 NumComponents(DataFormat format) { constexpr std::array numComponentsPerElement = { 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 488b00fc..7555cdb3 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -59,6 +61,18 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -u32 getNumComponents(DataFormat format); +[[nodiscard]] std::string_view NameOf(NumberFormat fmt); + +u32 NumComponents(DataFormat format); } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); + } +}; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8f9a76a2..906b937e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::QuadList: + // Needs to generate index buffer on the fly. + return vk::PrimitiveTopology::eTriangleList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; @@ -110,4 +113,42 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32B32A32Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32_32_32 && + num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR32G32B32Uint; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR8G8B8A8Unorm; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Srgb) { + return vk::Format::eR8G8B8A8Srgb; + } + UNREACHABLE(); +} + +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format) { + UNREACHABLE(); +} + +void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { + static constexpr u16 NumVerticesPerQuad = 4; + u16* out_data = reinterpret_cast(out_ptr); + for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { + *out_data++ = i; + *out_data++ = i + 1; + *out_data++ = i + 2; + *out_data++ = i + 2; + *out_data++ = i; + *out_data++ = i + 3; + } +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 97994bf8..38f021fd 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { @@ -20,4 +21,11 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); + +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format); + +void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e952263e..72ee6c9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { if (!frame) { if (!splash_img.has_value()) { - VideoCore::ImageInfo info{}; info.pixel_format = vk::Format::eR8G8B8A8Srgb; info.type = vk::ImageType::e2D; @@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr } Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { - // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6cbd26b9..3db09efe 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -4,22 +4,58 @@ #include #include "common/assert.h" +#include "core/memory.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { -GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, - vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, + const PipelineKey& key_, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules) - : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { + : instance{instance_}, scheduler{scheduler_}, key{key_} { const vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!infos[i]) { + continue; + } + stages[i] = *infos[i]; + } + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 0U, + .pSetLayouts = nullptr, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + const auto& vs_info = stages[0]; + for (const auto& input : vs_info.vs_inputs) { + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = u32(buffer.stride), + .inputRate = vk::VertexInputRate::eVertex, + }); + } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = 0U, - .pVertexBindingDescriptions = nullptr, - .vertexAttributeDescriptionCount = 0U, - .pVertexAttributeDescriptions = nullptr, + .vertexBindingDescriptionCount = static_cast(bindings.size()), + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data(), }; const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { @@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pName = "main", }; - const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; + const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); + const u32 num_color_formats = std::distance(key.color_formats.begin(), it); const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &color_format, - .depthAttachmentFormat = vk::Format::eUndefined, + .colorAttachmentCount = num_color_formats, + .pColorAttachmentFormats = key.color_formats.data(), + .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined, .stencilAttachmentFormat = vk::Format::eUndefined, }; @@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pDepthStencilState = &depth_info, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, - .layout = pipeline_layout, + .layout = *pipeline_layout, }; auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); @@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& GraphicsPipeline::~GraphicsPipeline() = default; +void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { + std::array buffers; + std::array offsets; + + const auto& vs_info = stages[0]; + const size_t num_buffers = vs_info.vs_inputs.size(); + for (u32 i = 0; i < num_buffers; ++i) { + const auto& input = vs_info.vs_inputs[i]; + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address); + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index d8b7887b..47cc5c23 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,19 +1,31 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/types.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" +namespace Core { +class MemoryManager; +} + namespace Vulkan { +static constexpr u32 MaxVertexBufferCount = 32; static constexpr u32 MaxShaderStages = 5; class Instance; +class Scheduler; using Liverpool = AmdGpu::Liverpool; struct PipelineKey { + std::array stage_hashes; + std::array color_formats; + vk::Format depth_format; + Liverpool::DepthControl depth; Liverpool::StencilControl stencil; Liverpool::StencilRefMask stencil_ref_front; @@ -21,26 +33,41 @@ struct PipelineKey { Liverpool::PrimitiveType prim_type; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; + + bool operator==(const PipelineKey& key) const noexcept { + return std::memcmp(this, &key, sizeof(PipelineKey)) == 0; + } }; static_assert(std::has_unique_object_representations_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, - vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, + explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, + const PipelineKey& key, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules); ~GraphicsPipeline(); + void BindResources(Core::MemoryManager* memory) const; + [[nodiscard]] vk::Pipeline Handle() const noexcept { return *pipeline; } private: const Instance& instance; + Scheduler& scheduler; vk::UniquePipeline pipeline; - vk::PipelineLayout pipeline_layout; - vk::PipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + std::array stages; PipelineKey key; }; } // namespace Vulkan + +template <> +struct std::hash { + std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept { + return XXH3_64bits(&key, sizeof(key)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0cde3e6e..32dca0c5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() { const std::string api_version = GetReadableVersion(properties.apiVersion); const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", ")); - LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name); - LOG_INFO(Render_Vulkan, "GPU_Model", model_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions); + LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name); + LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions); } void Instance::CollectToolingInfo() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 28fb51d0..6de86c4c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/scope_exit.h" +#include +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -11,60 +13,123 @@ namespace Vulkan { +Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, + const AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{}; + info.user_data = user_data; + info.stage = stage; + switch (stage) { + case Shader::Stage::Fragment: { + for (u32 i = 0; i < regs.num_interp; i++) { + info.ps_inputs.push_back({ + .param_index = regs.ps_inputs[i].input_offset.Value(), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = regs.ps_inputs[i].default_value, + }); + } + break; + } + default: + break; + } + return info; +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096}, + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = 0U, - .pSetLayouts = nullptr, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } -void PipelineCache::BindPipeline() { - SCOPE_EXIT { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); - }; +const GraphicsPipeline* PipelineCache::GetPipeline() { + RefreshKey(); + const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); + if (is_new) { + it.value() = CreatePipeline(); + } + const GraphicsPipeline* pipeline = it->second.get(); + return pipeline; +} - if (pipeline) { - return; +void PipelineCache::RefreshKey() { + auto& regs = liverpool->regs; + auto& key = graphics_key; + + key.depth = regs.depth_control; + key.stencil = regs.stencil_control; + key.stencil_ref_front = regs.stencil_ref_front; + key.stencil_ref_back = regs.stencil_ref_back; + key.prim_type = regs.primitive_type; + key.polygon_mode = regs.polygon_control.PolyMode(); + + const auto& db = regs.depth_buffer; + key.depth_format = key.depth.depth_enable + ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) + : vk::Format::eUndefined; + for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { + const auto& cb = regs.color_buffers[i]; + key.color_formats[i] = cb.base_address + ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) + : vk::Format::eUndefined; } - const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { - const u32* token = pgm.Address(); + for (u32 i = 0; i < MaxShaderStages; i++) { + auto* pgm = regs.ProgramForStage(i); + if (!pgm || !pgm->Address()) { + key.stage_hashes[i] = 0; + continue; + } + const u32* code = pgm->Address(); - // Retrieve shader header. Shader::BinaryInfo bininfo; - std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + key.stage_hashes[i] = bininfo.shader_hash; + } +} + +std::unique_ptr PipelineCache::CreatePipeline() { + const auto& regs = liverpool->regs; + + std::array programs; + std::array infos{}; + + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!graphics_key.stage_hashes[i]) { + stages[i] = VK_NULL_HANDLE; + continue; + } + auto* pgm = regs.ProgramForStage(i); + const u32* code = pgm->Address(); + + Shader::BinaryInfo bininfo; + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + const u32 num_dwords = bininfo.length / sizeof(u32); - // Lookup if the shader already exists. const auto it = module_map.find(bininfo.shader_hash); if (it != module_map.end()) { - return *it->second; + stages[i] = *it->second; + continue; } - // Compile and cache shader. - const auto data = std::span{token, bininfo.length / sizeof(u32)}; - const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data); - return CompileSPV(program, instance.GetDevice()); - }; + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); - // Retrieve shader stage modules. - // TODO: Only do this when program address is changed. - stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex); - stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment); + // Recompile shader to IR. + const auto stage = Shader::Stage{i}; + const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); + programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords}, + std::move(info)); - // Bind pipeline. - // TODO: Read entire key based on reg state. - graphics_key.prim_type = liverpool->regs.primitive_type; - graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode(); - pipeline = std::make_unique(instance, graphics_key, *pipeline_cache, - *pipeline_layout, stages); + // Compile IR to SPIR-V + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]); + stages[i] = CompileSPV(spv_code, instance.GetDevice()); + infos[i] = &programs[i].info; + } + + return std::make_unique(instance, scheduler, graphics_key, *pipeline_cache, + infos, stages); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7634f9cb..32830eab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -8,6 +8,10 @@ #include "shader_recompiler/object_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +namespace Shader { +struct Info; +} + namespace Vulkan { class Instance; @@ -21,7 +25,12 @@ public: AmdGpu::Liverpool* liverpool); ~PipelineCache() = default; - void BindPipeline(); + const GraphicsPipeline* GetPipeline(); + +private: + void RefreshKey(); + + std::unique_ptr CreatePipeline(); private: const Instance& instance; @@ -31,7 +40,7 @@ private: vk::UniquePipelineLayout pipeline_layout; tsl::robin_map module_map; std::array stages{}; - std::unique_ptr pipeline; + tsl::robin_map> graphics_pipelines; PipelineKey graphics_key{}; Shader::ObjectPool inst_pool; Shader::ObjectPool block_pool; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5f5d3d4e..3d301f62 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -18,33 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, - liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, + liverpool{liverpool_}, memory{Core::Memory::Instance()}, + pipeline_cache{instance, scheduler, liverpool}, vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } + + memory->SetInstance(&instance); } Rasterizer::~Rasterizer() = default; -void Rasterizer::DrawIndex() { +void Rasterizer::Draw(bool is_indexed) { const auto cmdbuf = scheduler.CommandBuffer(); - auto& regs = liverpool->regs; - - static bool first_time = true; - if (first_time) { - first_time = false; - return; - } - - UpdateDynamicState(); - - pipeline_cache.BindPipeline(); - - const u32 pitch = regs.color_buffers[0].Pitch(); - const u32 height = regs.color_buffers[0].Height(); - const u32 tile_max = regs.color_buffers[0].slice.tile_max; - auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch); + const auto& regs = liverpool->regs; + const u32 num_indices = SetupIndexBuffer(is_indexed); + const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline(); + pipeline->BindResources(memory); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, @@ -61,13 +54,50 @@ void Rasterizer::DrawIndex() { .pColorAttachments = &color_info, }; + UpdateDynamicState(); + cmdbuf.beginRendering(rendering_info); - cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); - cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); - cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + if (is_indexed) { + cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); + } else { + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + } cmdbuf.endRendering(); } +u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { + // Emulate QuadList primitive type with CPU made index buffer. + const auto& regs = liverpool->regs; + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) { + ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); + is_indexed = true; + + // Emit indices. + const u32 index_size = 3 * regs.num_indices; + const auto [data, offset, _] = vertex_index_buffer.Map(index_size); + LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + vertex_index_buffer.Commit(index_size); + + // Bind index buffer. + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16); + return index_size / sizeof(u16); + } + if (!is_indexed) { + return 0; + } + + const VAddr index_address = regs.index_base_address.Address(); + const auto [buffer, offset] = memory->GetVulkanBuffer(index_address); + const vk::IndexType index_type = + regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16 + : vk::IndexType::eUint32; + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(buffer, offset, index_type); + return regs.num_indices; +} + void Rasterizer::UpdateDynamicState() { UpdateViewportScissorState(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ba3c2d3a..a8386c25 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,7 +3,6 @@ #pragma once -#include #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -11,6 +10,10 @@ namespace AmdGpu { struct Liverpool; } +namespace Core { +class MemoryManager; +} + namespace VideoCore { class TextureCache; } @@ -26,17 +29,14 @@ public: VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); ~Rasterizer(); - /// Performs a draw call with an index buffer. - void DrawIndex(); - - /// Updates graphics state that is not part of the bound pipeline. - void UpdateDynamicState(); + void Draw(bool is_indexed); private: - /// Updates viewport and scissor from liverpool registers. - void UpdateViewportScissorState(); + u32 SetupIndexBuffer(bool& is_indexed); + void MapMemory(VAddr addr, size_t size); - /// Updates depth and stencil pipeline state from liverpool registers. + void UpdateDynamicState(); + void UpdateViewportScissorState(); void UpdateDepthStencilState(); private: @@ -44,6 +44,7 @@ private: Scheduler& scheduler; VideoCore::TextureCache& texture_cache; AmdGpu::Liverpool* liverpool; + Core::MemoryManager* memory; PipelineCache pipeline_cache; StreamBuffer vertex_index_buffer; }; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index d31a1f5d..637f03d0 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,7 +35,7 @@ public: * @param size Size to reserve. * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::tuple Map(u64 size, u64 alignment = 0); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Commit(u64 size); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b78d2563..6a7bba8e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/config.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/image.h" @@ -65,6 +66,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { + is_tiled = true; + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + type = vk::ImageType::e2D; + size.width = buffer.Pitch(); + size.height = buffer.Height(); + pitch = size.width; + guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); +} + UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c1bddec7..92391fde 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -6,6 +6,7 @@ #include "common/enum.h" #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; bool is_tiled = false; vk::Format pixel_format = vk::Format::eUndefined; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 15679ba9..e21bb6ed 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -101,8 +101,8 @@ TextureCache::~TextureCache() { } void TextureCache::OnCpuWrite(VAddr address) { - const VAddr address_aligned = address & ~((1 << PageBits) - 1); - ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) { + const VAddr address_aligned = address & ~((1 << PageShift) - 1); + ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -137,26 +137,20 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } -ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { - boost::container::small_vector image_ids; - ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { - if (image.cpu_addr == cpu_address) { - image_ids.push_back(image_id); - } - }); +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { + const ImageInfo info{buffer}; + auto& image = FindImage(info, buffer.Address()); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); - auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; - - ImageViewInfo info; - info.format = vk::Format::eB8G8R8A8Srgb; - if (const ImageViewId view_id = image->FindView(info); view_id) { + ImageViewInfo view_info; + view_info.format = info.pixel_format; + if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } - const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); - image->image_view_infos.emplace_back(info); - image->image_view_ids.emplace_back(view_id); + const ImageViewId view_id = + slot_image_views.insert(instance, scheduler, view_info, image.image); + image.image_view_infos.emplace_back(view_info); + image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } @@ -225,13 +219,13 @@ void TextureCache::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); return; } auto& image_ids = page_it.value(); const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); return; } image_ids.erase(vector_it); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a11201c4..f59f16c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,7 @@ public: Image& FindImage(const ImageInfo& info, VAddr cpu_address); /// Retrieves the render target with specified properties - ImageView& RenderTarget(VAddr cpu_address, u32 pitch); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); /// Reuploads image contents. void RefreshImage(Image& image);