video_core: Bringup some basic functionality (#145)

* video_core: Remove hack in rasterizer

* The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers

* shader_recompiler: Implement attribute loads/stores

* video_core: Add basic vertex, index buffer handling and pipeline caching

* externals: Make xxhash lowercase
This commit is contained in:
TheTurtle 2024-05-25 15:33:15 +03:00 committed by GitHub
parent e9f64bb76c
commit 3c90b8ac00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 1030 additions and 383 deletions

6
.gitmodules vendored
View File

@ -50,12 +50,12 @@
[submodule "externals/toml11"] [submodule "externals/toml11"]
path = externals/toml11 path = externals/toml11
url = https://github.com/ToruNiina/toml11.git url = https://github.com/ToruNiina/toml11.git
[submodule "externals/xxHash"]
path = externals/xxHash
url = https://github.com/Cyan4973/xxHash.git
[submodule "externals/zydis"] [submodule "externals/zydis"]
path = externals/zydis path = externals/zydis
url = https://github.com/zyantific/zydis.git url = https://github.com/zyantific/zydis.git
[submodule "externals/sirit"] [submodule "externals/sirit"]
path = externals/sirit path = externals/sirit
url = https://github.com/raphaelthegreat/sirit url = https://github.com/raphaelthegreat/sirit
[submodule "externals/xxhash"]
path = externals/xxhash
url = https://github.com/Cyan4973/xxHash.git

View File

@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/frontend/control_flow_graph.h src/shader_recompiler/frontend/control_flow_graph.h
src/shader_recompiler/frontend/decode.cpp src/shader_recompiler/frontend/decode.cpp
src/shader_recompiler/frontend/decode.h src/shader_recompiler/frontend/decode.h
src/shader_recompiler/frontend/fetch_shader.cpp
src/shader_recompiler/frontend/fetch_shader.h
src/shader_recompiler/frontend/format.cpp src/shader_recompiler/frontend/format.cpp
src/shader_recompiler/frontend/instruction.cpp src/shader_recompiler/frontend/instruction.cpp
src/shader_recompiler/frontend/instruction.h src/shader_recompiler/frontend/instruction.h
@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/constant_propogation_pass.cpp src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
src/shader_recompiler/ir/passes/info_collection.cpp
src/shader_recompiler/ir/passes/passes.h src/shader_recompiler/ir/passes/passes.h
src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp src/shader_recompiler/ir/attribute.cpp

View File

@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
add_subdirectory(toml11 EXCLUDE_FROM_ALL) add_subdirectory(toml11 EXCLUDE_FROM_ALL)
# xxHash # xxHash
add_library(xxhash INTERFACE) add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
target_include_directories(xxhash INTERFACE xxhash) target_include_directories(xxhash PUBLIC xxhash)
# Zydis # Zydis
option(ZYDIS_BUILD_TOOLS "" OFF) option(ZYDIS_BUILD_TOOLS "" OFF)

View File

@ -7,6 +7,7 @@
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Core { namespace Core {
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
new_vma.prot = prot; new_vma.prot = prot;
new_vma.name = name; new_vma.name = name;
new_vma.type = type; new_vma.type = type;
if (type == VMAType::Direct) {
MapVulkanMemory(mapped_addr, size);
}
}; };
// When virtual addr is zero let the address space manager pick the address. // When virtual addr is zero let the address space manager pick the address.
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
"Attempting to unmap partially mapped range"); "Attempting to unmap partially mapped range");
if (it->second.type == VMAType::Direct) {
UnmapVulkanMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours. // Mark region as free and attempt to coalesce it with neighbours.
auto& vma = it->second; auto& vma = it->second;
vma.type = VMAType::Free; vma.type = VMAType::Free;
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
impl.Unmap(virtual_addr, size); impl.Unmap(virtual_addr, size);
} }
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
auto it = mapped_memories.upper_bound(addr);
it = std::prev(it);
ASSERT(it != mapped_memories.end() && it->first <= addr);
return std::make_pair(*it->second.buffer, addr - it->first);
}
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr); auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
return iter; return iter;
} }
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
const vk::Device device = instance->GetDevice();
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
void* host_pointer = reinterpret_cast<void*>(addr);
const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
ASSERT(host_mem_props.memoryTypeBits != 0);
int mapped_memory_type = -1;
auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
u32 host_mem_types = host_mem_props.memoryTypeBits;
while (host_mem_types != 0) {
// Try to find a cached memory type
mapped_memory_type = std::countr_zero(host_mem_types);
host_mem_types -= (1 << mapped_memory_type);
if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
return;
}
}
mapped_memory_type = -1;
};
// First try to find a memory that is both coherent and cached
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
vk::MemoryPropertyFlagBits::eHostCached);
if (mapped_memory_type == -1)
// Then only coherent (lower performance)
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
if (mapped_memory_type == -1) {
LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
}
const vk::StructureChain alloc_info = {
vk::MemoryAllocateInfo{
.allocationSize = size,
.memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
},
vk::ImportMemoryHostPointerInfoEXT{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = host_pointer,
},
};
const auto [it, new_memory] = mapped_memories.try_emplace(addr);
ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
auto& memory = it->second;
memory.backing = device.allocateMemoryUnique(alloc_info.get());
constexpr vk::BufferUsageFlags MapFlags =
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformBuffer;
const vk::StructureChain buffer_info = {
vk::BufferCreateInfo{
.size = size,
.usage = MapFlags,
.sharingMode = vk::SharingMode::eExclusive,
},
vk::ExternalMemoryBufferCreateInfoKHR{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
}};
memory.buffer = device.createBufferUnique(buffer_info.get());
device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
}
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
const auto it = mapped_memories.find(addr);
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
mapped_memories.erase(it);
}
} // namespace Core } // namespace Core

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <functional>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
#include <boost/icl/split_interval_map.hpp> #include <boost/icl/split_interval_map.hpp>
@ -10,6 +11,11 @@
#include "common/singleton.h" #include "common/singleton.h"
#include "common/types.h" #include "common/types.h"
#include "core/address_space.h" #include "core/address_space.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
}
namespace Core { namespace Core {
@ -86,6 +92,10 @@ public:
explicit MemoryManager(); explicit MemoryManager();
~MemoryManager(); ~MemoryManager();
void SetInstance(const Vulkan::Instance* instance_) {
instance = instance_;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type); int memory_type);
@ -97,11 +107,9 @@ public:
void UnmapMemory(VAddr virtual_addr, size_t size); void UnmapMemory(VAddr virtual_addr, size_t size);
private: std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
bool HasOverlap(VAddr addr, size_t size) const {
return vma_map.find(addr) != vma_map.end();
}
private:
VMAHandle FindVMA(VAddr target) { VMAHandle FindVMA(VAddr target) {
// Return first the VMA with base >= target. // Return first the VMA with base >= target.
const auto it = vma_map.lower_bound(target); const auto it = vma_map.lower_bound(target);
@ -117,10 +125,22 @@ private:
VMAHandle MergeAdjacent(VMAHandle iter); VMAHandle MergeAdjacent(VMAHandle iter);
void MapVulkanMemory(VAddr addr, size_t size);
void UnmapVulkanMemory(VAddr addr, size_t size);
private: private:
AddressSpace impl; AddressSpace impl;
std::vector<DirectMemoryArea> allocations; std::vector<DirectMemoryArea> allocations;
VMAMap vma_map; VMAMap vma_map;
struct MappedMemory {
vk::UniqueBuffer buffer;
vk::UniqueDeviceMemory backing;
size_t buffer_size;
};
std::map<VAddr, MappedMemory> mapped_memories;
const Vulkan::Instance* instance{};
}; };
using Memory = Common::Singleton<MemoryManager>; using Memory = Common::Singleton<MemoryManager>;

View File

@ -20,7 +20,6 @@
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/videoout/video_out.h" #include "core/libraries/videoout/video_out.h"
#include "core/linker.h" #include "core/linker.h"
#include "core/tls.h"
#include "input/controller.h" #include "input/controller.h"
#include "sdl_window.h" #include "sdl_window.h"

View File

@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{}; spv::ExecutionModel execution_model{};
switch (program.stage) { switch (program.info.stage) {
case Stage::Compute: { case Stage::Compute: {
// const std::array<u32, 3> workgroup_size{program.workgroup_size}; // const std::array<u32, 3> workgroup_size{program.workgroup_size};
// execution_model = spv::ExecutionModel::GLCompute; // execution_model = spv::ExecutionModel::GLCompute;
@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
// } // }
break; break;
default: default:
throw NotImplementedException("Stage {}", u32(program.stage)); throw NotImplementedException("Stage {}", u32(program.info.stage));
} }
ctx.AddEntryPoint(execution_model, main, "main", interfaces); ctx.AddEntryPoint(execution_model, main, "main", interfaces);
} }
@ -222,7 +222,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, Binding
EmitContext ctx{profile, program, bindings}; EmitContext ctx{profile, program, bindings};
const Id main{DefineMain(ctx, program)}; const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main); DefineEntryPoint(program, ctx, main);
if (program.stage == Stage::Vertex) { if (program.info.stage == Stage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters); ctx.AddCapability(spv::Capability::DrawParameters);
} }

View File

@ -10,12 +10,11 @@ namespace {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) { if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index).at(element)}; const auto& info{ctx.output_params.at(index)};
if (info.num_components == 1) { if (info.num_components == 1) {
return info.id; return info.id;
} else { } else {
const u32 index_element{element - info.first_element}; return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element));
} }
} }
switch (attr) { switch (attr) {
@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::
throw LogicError("Unreachable instruction"); throw LogicError("Unreachable instruction");
} }
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
const u32 element{static_cast<u32>(attr) % 4};
if (IR::IsParam(attr)) { if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)}; const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) { if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written // Attribute is disabled or varying component is not written
return ctx.ConstF32(element == 3 ? 1.0f : 0.0f); return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
} }
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))}; const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer); return ctx.OpLoad(param.component_type, pointer);
} }
throw NotImplementedException("Read attribute {}", attr); throw NotImplementedException("Read attribute {}", attr);
} }
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
switch (attr) { switch (attr) {
case IR::Attribute::VertexId: case IR::Attribute::VertexId:
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
} }
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Param0) {
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)}; const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, value); ctx.OpStore(pointer, value);
} }

View File

@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
const IR::Value& offset); const IR::Value& offset);
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
const IR::Value& offset); const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value);

View File

@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace } // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} {
u32& uniform_binding{bindings.unified}; u32& uniform_binding{bindings.unified};
u32& storage_binding{bindings.unified}; u32& storage_binding{bindings.unified};
u32& texture_binding{bindings.unified}; u32& texture_binding{bindings.unified};
@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() {
u32_zero_value = ConstU32(0U); u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f); f32_zero_value = ConstF32(0.0f);
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32");
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
} }
@ -107,26 +111,123 @@ void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineOutputs(program); DefineOutputs(program);
} }
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4];
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
case AmdGpu::NumberFormat::Sint:
return {id, input_s32, S32[1], 4};
case AmdGpu::NumberFormat::Sscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uscaled:
return {id, input_f32, F32[1], 4};
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
case 1:
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
case 2:
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
case 3:
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
default:
UNREACHABLE();
}
}
void EmitContext::DefineInputs(const IR::Program& program) { void EmitContext::DefineInputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) { switch (stage) {
case Stage::Vertex: case Stage::Vertex:
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
break; break;
case Stage::Fragment:
for (const auto& input : info.ps_inputs) {
if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
input_f32, F32[1]};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]};
const Id id{DefineInput(type, input.semantic)};
if (input.is_flat) {
Decorate(id, spv::Decoration::Flat);
}
Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
interfaces.push_back(id);
}
default: default:
break; break;
} }
} }
void EmitContext::DefineOutputs(const IR::Program& program) { void EmitContext::DefineOutputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) { switch (stage) {
case Stage::Vertex: case Stage::Vertex:
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components};
interfaces.push_back(id);
}
break; break;
case Stage::Fragment: case Stage::Fragment:
frag_color[0] = DefineOutput(F32[4], 0); for (u32 i = 0; i < IR::NumRenderTargets; i++) {
Name(frag_color[0], fmt::format("frag_color{}", 0)); const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
interfaces.push_back(frag_color[0]); if (!info.stores.GetAny(mrt)) {
continue;
}
frag_color[i] = DefineOutput(F32[4], i);
Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]);
}
break; break;
default: default:
break; break;

View File

@ -135,6 +135,9 @@ public:
Id u32_zero_value{}; Id u32_zero_value{};
Id f32_zero_value{}; Id f32_zero_value{};
Id input_u32{};
Id input_f32{};
Id input_s32{};
Id output_u32{}; Id output_u32{};
Id output_f32{}; Id output_f32{};
@ -145,25 +148,22 @@ public:
Id base_vertex{}; Id base_vertex{};
std::array<Id, 8> frag_color{}; std::array<Id, 8> frag_color{};
struct InputParamInfo { struct SpirvAttribute {
Id id; Id id;
Id pointer_type; Id pointer_type;
Id component_type; Id component_type;
u32 num_components;
}; };
std::array<InputParamInfo, 32> input_params{}; std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};
struct ParamElementInfo {
Id id{};
u32 first_element{};
u32 num_components{};
};
std::array<std::array<ParamElementInfo, 4>, 32> output_params{};
private: private:
void DefineArithmeticTypes(); void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program); void DefineInterfaces(const IR::Program& program);
void DefineInputs(const IR::Program& program); void DefineInputs(const IR::Program& program);
void DefineOutputs(const IR::Program& program); void DefineOutputs(const IR::Program& program);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
}; };
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"
namespace Shader::Gcn {
/**
* s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04
* s_load_dwordx4 s[16:19], s[2:3], 0x08
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen
* buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen
* buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen
* s_waitcnt 0
* s_setpc_b64 s[0:1]
* s_load_dwordx4 s[4:7], s[2:3], 0x0
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen
* s_load_dwordx4 s[4:7], s[2:3], 0x8
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen
* s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
* s_setpc_b64 s[0:1]
* A normal fetch shader looks like the above, the instructions are generated
* using input semantics on cpu side. Load instructions can either be separate or interleaved
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
s32 dst_reg{-1};
};
boost::container::static_vector<VsharpLoad, 16> loads;
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}
if (inst.inst_class == InstClass::ScalarMemRd) {
loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code);
continue;
}
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++;
attrib.dest_vgpr = inst.src[1].code;
attrib.num_elements = inst.control.mubuf.count;
attrib.sgpr_base = it->base_sgpr;
attrib.dword_offset = it->dword_offset;
// Mark load as used.
it->dst_reg = -1;
}
}
return attributes;
}
} // namespace Shader::Gcn

View File

@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/types.h"
namespace Shader::Gcn {
struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
} // namespace Shader::Gcn

View File

@ -600,9 +600,9 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt, ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_, IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Stage stage_) Info& info_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} { syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
Visit(root_stmt, nullptr, nullptr); Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block}; IR::Block& first_block{*syntax_list.front().data.block};
@ -633,8 +633,7 @@ private:
ensure_block(); ensure_block();
const u32 start = stmt.block->begin_index; const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1; const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stage, inst_list.subspan(start, size)); Translate(current_block, inst_list.subspan(start, size), info);
fmt::print("{}\n", IR::DumpBlock(*current_block));
break; break;
} }
case StatementType::SetVariable: { case StatementType::SetVariable: {
@ -812,17 +811,17 @@ private:
IR::AbstractSyntaxList& syntax_list; IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{}; const Block dummy_flow_block{};
std::span<const GcnInst> inst_list; std::span<const GcnInst> inst_list;
Stage stage; Info& info;
}; };
} // Anonymous namespace } // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Stage stage) { CFG& cfg, Info& info) {
ObjectPool<Statement> stmt_pool{64}; ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool}; GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()}; Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list; IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage}; TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
return syntax_list; return syntax_list;
} }

View File

@ -10,13 +10,13 @@
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
namespace Shader { namespace Shader {
enum class Stage : u32; struct Info;
} }
namespace Shader::Gcn { namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg, ObjectPool<IR::Block>& block_pool, CFG& cfg,
Stage stage); Info& info);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Gcn { namespace Shader::Gcn {
Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} { Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
IR::VectorReg dst_vreg = IR::VectorReg::V0; IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (stage) { switch (info.stage) {
case Stage::Vertex: case Stage::Vertex:
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
@ -92,11 +94,39 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
} }
} }
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list) { void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
}
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
});
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) { if (inst_list.empty()) {
return; return;
} }
Translator translator{block, stage}; Translator translator{block, info};
for (const auto& inst : inst_list) { for (const auto& inst : inst_list) {
switch (inst.opcode) { switch (inst.opcode) {
case Opcode::S_MOV_B32: case Opcode::S_MOV_B32:
@ -115,6 +145,9 @@ void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list
translator.V_MUL_F32(inst); translator.V_MUL_F32(inst);
break; break;
case Opcode::S_SWAPPC_B64: case Opcode::S_SWAPPC_B64:
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT: case Opcode::S_WAITCNT:
break; // Ignore for now. break; // Ignore for now.
case Opcode::S_BUFFER_LOAD_DWORDX16: case Opcode::S_BUFFER_LOAD_DWORDX16:

View File

@ -7,9 +7,10 @@
#include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader { namespace Shader {
enum class Stage : u32; struct Info;
} }
namespace Shader::Gcn { namespace Shader::Gcn {
@ -25,7 +26,9 @@ enum class ConditionOp : u32 {
class Translator { class Translator {
public: public:
explicit Translator(IR::Block* block_, Stage stage); explicit Translator(IR::Block* block_, Info& info);
void EmitFetch(const GcnInst& inst);
// Scalar ALU // Scalar ALU
void S_MOV(const GcnInst& inst); void S_MOV(const GcnInst& inst);
@ -66,8 +69,9 @@ private:
private: private:
IR::Block* block; IR::Block* block;
IR::IREmitter ir; IR::IREmitter ir;
Info& info;
}; };
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list); void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])), const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
ir.FPConvert(16, GetSrc(inst.src[1]))); ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32));
} }
void Translator::V_MUL_F32(const GcnInst& inst) { void Translator::V_MUL_F32(const GcnInst& inst) {

View File

@ -7,7 +7,9 @@ namespace Shader::Gcn {
void Translator::V_INTERP_P2_F32(const GcnInst& inst) { void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr}; auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
attr.semantic = inst.control.vintrp.attr;
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
} }

View File

@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) {
return "Param31"; return "Param31";
case Attribute::VertexId: case Attribute::VertexId:
return "VertexId"; return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::FragCoord:
return "FragCoord";
default: default:
break; break;
} }

View File

@ -72,10 +72,12 @@ enum class Attribute : u64 {
LocalInvocationId = 75, LocalInvocationId = 75,
LocalInvocationIndex = 76, LocalInvocationIndex = 76,
FragCoord = 77, FragCoord = 77,
Max,
}; };
constexpr size_t EXP_NUM_POS = 4; constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t EXP_NUM_PARAM = 32; constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept; [[nodiscard]] bool IsParam(Attribute attribute) noexcept;
@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32;
if (result > static_cast<int>(Attribute::Param31)) { if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic"); throw LogicError("Overflow on register arithmetic");
} }
if (result < static_cast<int>(Attribute::Param0)) { if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic"); throw LogicError("Underflow on register arithmetic");
} }
return static_cast<Attribute>(result); return static_cast<Attribute>(result);

View File

@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value); Inst(Opcode::SetVcc, value);
} }
F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return GetAttribute(attribute, 0);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp)); return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
} }
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
return GetAttributeU32(attribute, 0);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
return Inst<U32>(Opcode::GetAttributeU32, attribute, Imm32(comp)); return Inst<U32>(Opcode::GetAttributeU32, attribute, Imm32(comp));
} }

View File

@ -58,11 +58,9 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute); [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp);
void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);

View File

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: {
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
}
case IR::Opcode::SetAttribute: {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
default:
break;
}
}
void CollectShaderInfoPass(IR::Program& program) {
Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst);
}
}
}
} // namespace Shader::Optimization

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization { namespace Shader::Optimization {
@ -11,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
} // namespace Shader::Optimization } // namespace Shader::Optimization

View File

@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) {
}; };
} }
void ResourceTrackingPass(IR::BlockList& program) { void ResourceTrackingPass(IR::Program& program) {
for (IR::Block* const block : program) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) { if (!IsResourceInstruction(inst)) {
continue; continue;
} }
printf("ff\n");
IR::Inst* producer = inst.Arg(0).InstRecursive(); IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0)); const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,

View File

@ -3,15 +3,11 @@
#pragma once #pragma once
#include <array>
#include <string> #include <string>
#include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader {
enum class Stage : u32;
}
namespace Shader::IR { namespace Shader::IR {
@ -20,7 +16,7 @@ struct Program {
BlockList blocks; BlockList blocks;
BlockList post_order_blocks; BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list; std::vector<Gcn::GcnInst> ins_list;
Stage stage; Info info;
}; };
[[nodiscard]] std::string DumpProgram(const Program& program); [[nodiscard]] std::string DumpProgram(const Program& program);

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream> #include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/frontend/structured_control_flow.h"
@ -30,9 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks; return blocks;
} }
std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool, IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
ObjectPool<IR::Block>& block_pool, Stage stage, std::span<const u32> token, const Info&& info) {
std::span<const u32> token) {
// Ensure first instruction is expected. // Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF; constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -40,6 +38,11 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
Gcn::GcnDecodeContext decoder; Gcn::GcnDecodeContext decoder;
static int counter = 0;
std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary);
file.write((const char*)token.data(), token.size_bytes());
file.close();
// Decode and save instructions // Decode and save instructions
IR::Program program; IR::Program program;
program.ins_list.reserve(token.size()); program.ins_list.reserve(token.size());
@ -52,21 +55,24 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::CFG cfg{gcn_block_pool, program.ins_list}; Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program. // Structurize control flow graph and create program.
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage); program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list); program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
program.stage = stage;
// Run optimization passes // Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::IdentityRemovalPass(program.blocks);
// Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program.blocks);
Shader::Optimization::CollectShaderInfoPass(program);
// TODO: Pass profile from vulkan backend for (const auto& block : program.blocks) {
const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); fmt::print("{}\n", IR::DumpBlock(*block));
return code; }
return program;
} }
} // namespace Shader } // namespace Shader

View File

@ -3,7 +3,9 @@
#pragma once #pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader { namespace Shader {
@ -26,8 +28,8 @@ struct BinaryInfo {
u32 crc32; u32 crc32;
}; };
[[nodiscard]] std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage, ObjectPool<IR::Block>& block_pool,
std::span<const u32> code); std::span<const u32> code, const Info&& info);
} // namespace Shader } // namespace Shader

View File

@ -3,39 +3,16 @@
#pragma once #pragma once
#include <array> #include <span>
#include <boost/container/small_vector.hpp> #include <boost/container/static_vector.hpp>
#include "shader_recompiler/ir/type.h" #include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/ir/attribute.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader { namespace Shader {
enum class AttributeType : u8 { static constexpr size_t NumUserDataRegs = 16;
Float,
SignedInt,
UnsignedInt,
SignedScaled,
UnsignedScaled,
Disabled,
};
enum class InputTopology {
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency,
};
enum class CompareFunction {
Never,
Less,
Equal,
LessThanEqual,
Greater,
NotEqual,
GreaterThanEqual,
Always,
};
enum class Stage : u32 { enum class Stage : u32 {
Vertex, Vertex,
@ -62,78 +39,64 @@ enum class TextureType : u32 {
}; };
constexpr u32 NUM_TEXTURE_TYPES = 7; constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class Interpolation {
Smooth,
Flat,
NoPerspective,
};
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct TextureDescriptor {
TextureType type;
bool is_eud;
bool is_depth;
bool is_multisample;
bool is_storage;
u32 count;
u32 eud_offset_dwords;
u32 ud_index_dwords;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
struct Info { struct Info {
bool uses_workgroup_id{}; struct VsInput {
bool uses_local_invocation_id{}; AmdGpu::NumberFormat fmt;
bool uses_invocation_id{}; u16 binding;
bool uses_invocation_info{}; u16 num_components;
bool uses_sample_id{}; u8 sgpr_base;
u8 dword_offset;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
std::array<Interpolation, 32> interpolation{}; struct PsInput {
// VaryingState loads; u32 param_index;
// VaryingState stores; u32 semantic;
// VaryingState passthrough; bool is_default;
bool is_flat;
u32 default_value;
};
boost::container::static_vector<PsInput, 32> ps_inputs{};
std::array<bool, 8> stores_frag_color{}; struct AttributeFlags {
bool stores_sample_mask{}; bool Get(IR::Attribute attrib, u32 comp = 0) const {
bool stores_frag_depth{}; return flags[Index(attrib)] & (1 << comp);
}
bool uses_fp16{}; bool GetAny(IR::Attribute attrib) const {
bool uses_fp64{}; return flags[Index(attrib)];
bool uses_fp16_denorms_flush{}; }
bool uses_fp16_denorms_preserve{};
bool uses_fp32_denorms_flush{};
bool uses_fp32_denorms_preserve{};
bool uses_int8{};
bool uses_int16{};
bool uses_int64{};
bool uses_image_1d{};
bool uses_sampled_1d{};
bool uses_subgroup_vote{};
bool uses_subgroup_mask{};
bool uses_derivatives{};
IR::Type used_constant_buffer_types{}; void Set(IR::Attribute attrib, u32 comp = 0) {
IR::Type used_storage_buffer_types{}; flags[Index(attrib)] |= (1 << comp);
IR::Type used_indirect_cbuf_types{}; }
// std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; u32 NumComponents(IR::Attribute attrib) const {
u32 used_clip_distances{}; const u8 mask = flags[Index(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask);
}
// boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> static size_t Index(IR::Attribute attrib) {
// constant_buffer_descriptors; return static_cast<size_t>(attrib);
// boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> }
// storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors;
// ImageBufferDescriptors image_buffer_descriptors; std::array<u8, IR::NumAttributes> flags;
// TextureDescriptors texture_descriptors; };
// ImageDescriptors image_descriptors; AttributeFlags loads{};
AttributeFlags stores{};
std::span<const u32> user_data;
Stage stage;
template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data;
u32* base;
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
std::memcpy(&data, base + dword_offset, sizeof(T));
return data;
}
}; };
} // namespace Shader } // namespace Shader

View File

@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) { if (rasterizer) {
rasterizer->DrawIndex(); rasterizer->Draw(true);
} }
break; break;
} }
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header); const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex(); if (rasterizer) {
rasterizer->Draw(false);
}
break; break;
} }
case PM4ItOpcode::DispatchDirect: { case PM4ItOpcode::DispatchDirect: {

View File

@ -6,6 +6,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include <array> #include <array>
#include <condition_variable> #include <condition_variable>
@ -32,13 +33,13 @@ struct Liverpool {
static constexpr u32 NumColorBuffers = 8; static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16; static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6; static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumWordsShaderUserData = 16; static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000; static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000; static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumWordsShaderUserData>; using UserData = std::array<u32, NumShaderUserData>;
struct ShaderProgram { struct ShaderProgram {
u32 address_lo; u32 address_lo;
@ -57,6 +58,14 @@ struct Liverpool {
} }
}; };
union PsInputControl {
u32 raw;
BitField<0, 5, u32> input_offset;
BitField<5, 1, u32> use_default;
BitField<8, 2, u32> default_value;
BitField<10, 1, u32> flat_shade;
};
enum class ShaderExportComp : u32 { enum class ShaderExportComp : u32 {
None = 0, None = 0,
OneComp = 1, OneComp = 1,
@ -171,25 +180,6 @@ struct Liverpool {
BitField<31, 1, u32> disable_color_writes_on_depth_pass; BitField<31, 1, u32> disable_color_writes_on_depth_pass;
}; };
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 { enum class StencilFunc : u32 {
Keep = 0, Keep = 0,
Zero = 1, Zero = 1,
@ -227,9 +217,45 @@ struct Liverpool {
BitField<24, 8, u32> stencil_op_val; BitField<24, 8, u32> stencil_op_val;
}; };
union StencilInfo { struct DepthBuffer {
u32 raw; enum class ZFormat : u32 {
BitField<0, 1, u32> format; Invald = 0,
Z16 = 1,
Z32Float = 2,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
}; };
enum class ClipSpace : u32 { enum class ClipSpace : u32 {
@ -423,39 +449,6 @@ struct Liverpool {
Swap8In64 = 3, Swap8In64 = 3,
}; };
enum class Format : u32 {
Invalid = 0,
Color_8 = 1,
Color_16 = 2,
Color_8_8 = 3,
Color_32 = 4,
Color_16_16 = 5,
Color_10_11_11 = 6,
Color_11_11_10 = 7,
Color_10_10_10_2 = 8,
Color_2_10_10_10 = 9,
Color_8_8_8_8 = 10,
Color_32_32 = 11,
Color_16_16_16_16 = 12,
Color_32_32_32_32 = 14,
Color_5_6_5 = 16,
Color_1_5_5_5 = 17,
Color_5_5_5_1 = 18,
Color_4_4_4_4 = 19,
Color_8_24 = 20,
Color_24_8 = 21,
Color_X24_8_32_FL = 22,
};
enum class NumberType : u32 {
Unorm = 0,
Snorm = 1,
Uint = 4,
Sint = 5,
Srgb = 6,
Float = 7,
};
enum class SwapMode : u32 { enum class SwapMode : u32 {
Standard = 0, Standard = 0,
Alternate = 1, Alternate = 1,
@ -482,9 +475,9 @@ struct Liverpool {
} view; } view;
union { union {
BitField<0, 2, EndianSwap> endian; BitField<0, 2, EndianSwap> endian;
BitField<2, 5, Format> format; BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general; BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberType> number_type; BitField<8, 2, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap; BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear; BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression; BitField<14, 1, u32> compression;
@ -529,6 +522,12 @@ struct Liverpool {
u64 CmaskAddress() const { u64 CmaskAddress() const {
return u64(cmask_base_address) << 8; return u64(cmask_base_address) << 8;
} }
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
}; };
enum class PrimitiveType : u32 { enum class PrimitiveType : u32 {
@ -563,14 +562,8 @@ struct Liverpool {
u32 stencil_clear; u32 stencil_clear;
u32 depth_clear; u32 depth_clear;
Scissor screen_scissor; Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2); INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
StencilInfo stencil_info; DepthBuffer depth_buffer;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA08E - 0xA018); INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask; ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask; ColorBufferMask color_shader_mask;
@ -584,9 +577,12 @@ struct Liverpool {
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports; std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data; std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA1B1 - 0xA187); INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config; VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1); INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format; ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format; ShaderExportFormat z_export_format;
ColorExportFormat color_export_format; ColorExportFormat color_export_format;
@ -616,6 +612,17 @@ struct Liverpool {
VgtNumInstances num_instances; VgtNumInstances num_instances;
}; };
std::array<u32, NumRegs> reg_array{}; std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
}; };
Regs regs{}; Regs regs{};
@ -656,14 +663,16 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);

View File

@ -2,11 +2,45 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <array> #include <array>
#include "common/assert.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu { namespace AmdGpu {
u32 getNumComponents(DataFormat format) { std::string_view NameOf(NumberFormat fmt) {
switch (fmt) {
case NumberFormat::Unorm:
return "Unorm";
case NumberFormat::Snorm:
return "Snorm";
case NumberFormat::Uscaled:
return "Uscaled";
case NumberFormat::Sscaled:
return "Sscaled";
case NumberFormat::Uint:
return "Uint";
case NumberFormat::Sint:
return "Sint";
case NumberFormat::SnormNz:
return "SnormNz";
case NumberFormat::Float:
return "Float";
case NumberFormat::Srgb:
return "Srgb";
case NumberFormat::Ubnorm:
return "Ubnorm";
case NumberFormat::UbnromNz:
return "UbnormNz";
case NumberFormat::Ubint:
return "Ubint";
case NumberFormat::Ubscaled:
return "Unscaled";
default:
UNREACHABLE();
}
}
u32 NumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = { constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h" #include "common/types.h"
namespace AmdGpu { namespace AmdGpu {
@ -59,6 +61,18 @@ enum class NumberFormat : u32 {
Ubscaled = 13, Ubscaled = 13,
}; };
u32 getNumComponents(DataFormat format); [[nodiscard]] std::string_view NameOf(NumberFormat fmt);
u32 NumComponents(DataFormat format);
} // namespace AmdGpu } // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::NumberFormat> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};

View File

@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
return vk::PrimitiveTopology::eTriangleListWithAdjacency; return vk::PrimitiveTopology::eTriangleListWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleStrip: case Liverpool::PrimitiveType::AdjTriangleStrip:
return vk::PrimitiveTopology::eTriangleStripWithAdjacency; return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
case Liverpool::PrimitiveType::QuadList:
// Needs to generate index buffer on the fly.
return vk::PrimitiveTopology::eTriangleList;
default: default:
UNREACHABLE(); UNREACHABLE();
return vk::PrimitiveTopology::eTriangleList; return vk::PrimitiveTopology::eTriangleList;
@ -110,4 +113,42 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
} }
} }
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
}
UNREACHABLE();
}
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format) {
UNREACHABLE();
}
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 3;
}
}
} // namespace Vulkan::LiverpoolToVK } // namespace Vulkan::LiverpoolToVK

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK { namespace Vulkan::LiverpoolToVK {
@ -20,4 +21,11 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK } // namespace Vulkan::LiverpoolToVK

View File

@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
if (!frame) { if (!frame) {
if (!splash_img.has_value()) { if (!splash_img.has_value()) {
VideoCore::ImageInfo info{}; VideoCore::ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Srgb; info.pixel_format = vk::Format::eR8G8B8A8Srgb;
info.type = vk::ImageType::e2D; info.type = vk::ImageType::e2D;
@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
} }
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame. // Request a free presentation frame.
Frame* frame = GetRenderFrame(); Frame* frame = GetRenderFrame();

View File

@ -4,22 +4,58 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan { namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, const PipelineKey& key_, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules) std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { : instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
continue;
}
stages[i] = *infos[i];
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.inputRate = vk::VertexInputRate::eVertex,
});
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 0U, .vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
.pVertexBindingDescriptions = nullptr, .pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = 0U, .vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = nullptr, .pVertexAttributeDescriptions = attributes.data(),
}; };
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main", .pName = "main",
}; };
const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1, .colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = &color_format, .pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = vk::Format::eUndefined, .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined, .stencilAttachmentFormat = vk::Format::eUndefined,
}; };
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pDepthStencilState = &depth_info, .pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending, .pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info, .pDynamicState = &dynamic_info,
.layout = pipeline_layout, .layout = *pipeline_layout,
}; };
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
GraphicsPipeline::~GraphicsPipeline() = default; GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -1,19 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace Vulkan { namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5; static constexpr u32 MaxShaderStages = 5;
class Instance; class Instance;
class Scheduler;
using Liverpool = AmdGpu::Liverpool; using Liverpool = AmdGpu::Liverpool;
struct PipelineKey { struct PipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
Liverpool::DepthControl depth; Liverpool::DepthControl depth;
Liverpool::StencilControl stencil; Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
Liverpool::PrimitiveType prim_type; Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode; Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode; Liverpool::CullMode cull_mode;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
}
}; };
static_assert(std::has_unique_object_representations_v<PipelineKey>); static_assert(std::has_unique_object_representations_v<PipelineKey>);
class GraphicsPipeline { class GraphicsPipeline {
public: public:
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, const PipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules); std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept { [[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline; return *pipeline;
} }
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache; std::array<Shader::Info, MaxShaderStages> stages;
PipelineKey key; PipelineKey key;
}; };
} // namespace Vulkan } // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View File

@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
const std::string api_version = GetReadableVersion(properties.apiVersion); const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", ")); const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name); LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name); LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
} }
void Instance::CollectToolingInfo() { void Instance::CollectToolingInfo() {

View File

@ -1,9 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h" #include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h" #include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -11,60 +13,123 @@
namespace Vulkan { namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = user_data;
info.stage = stage;
switch (stage) {
case Shader::Stage::Fragment: {
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
default:
break;
}
return info;
}
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_) AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096}, : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} { block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
} }
void PipelineCache::BindPipeline() { const GraphicsPipeline* PipelineCache::GetPipeline() {
SCOPE_EXIT { RefreshKey();
const auto cmdbuf = scheduler.CommandBuffer(); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_new) {
}; it.value() = CreatePipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
if (pipeline) { void PipelineCache::RefreshKey() {
return; auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
const auto& cb = regs.color_buffers[i];
key.color_formats[i] = cb.base_address
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
: vk::Format::eUndefined;
} }
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { for (u32 i = 0; i < MaxShaderStages; i++) {
const u32* token = pgm.Address<u32>(); auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
key.stage_hashes[i] = 0;
continue;
}
const u32* code = pgm->Address<u32>();
// Retrieve shader header.
Shader::BinaryInfo bininfo; Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
key.stage_hashes[i] = bininfo.shader_hash;
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
const auto& regs = liverpool->regs;
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!graphics_key.stage_hashes[i]) {
stages[i] = VK_NULL_HANDLE;
continue;
}
auto* pgm = regs.ProgramForStage(i);
const u32* code = pgm->Address<u32>();
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
// Lookup if the shader already exists.
const auto it = module_map.find(bininfo.shader_hash); const auto it = module_map.find(bininfo.shader_hash);
if (it != module_map.end()) { if (it != module_map.end()) {
return *it->second; stages[i] = *it->second;
continue;
} }
// Compile and cache shader. block_pool.ReleaseContents();
const auto data = std::span{token, bininfo.length / sizeof(u32)}; inst_pool.ReleaseContents();
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data);
return CompileSPV(program, instance.GetDevice());
};
// Retrieve shader stage modules. // Recompile shader to IR.
// TODO: Only do this when program address is changed. const auto stage = Shader::Stage{i};
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
std::move(info));
// Bind pipeline. // Compile IR to SPIR-V
// TODO: Read entire key based on reg state. const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
graphics_key.prim_type = liverpool->regs.primitive_type; stages[i] = CompileSPV(spv_code, instance.GetDevice());
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode(); infos[i] = &programs[i].info;
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache, }
*pipeline_layout, stages);
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
infos, stages);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -8,6 +8,10 @@
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
struct Info;
}
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;
@ -21,7 +25,12 @@ public:
AmdGpu::Liverpool* liverpool); AmdGpu::Liverpool* liverpool);
~PipelineCache() = default; ~PipelineCache() = default;
void BindPipeline(); const GraphicsPipeline* GetPipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
private: private:
const Instance& instance; const Instance& instance;
@ -31,7 +40,7 @@ private:
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map; tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{}; std::array<vk::ShaderModule, MaxShaderStages> stages{};
std::unique_ptr<GraphicsPipeline> pipeline; tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
PipelineKey graphics_key{}; PipelineKey graphics_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool; Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool; Shader::ObjectPool<Shader::IR::Block> block_pool;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h" #include "common/config.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,33 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
if (!Config::nullGpu()) { if (!Config::nullGpu()) {
liverpool->BindRasterizer(this); liverpool->BindRasterizer(this);
} }
memory->SetInstance(&instance);
} }
Rasterizer::~Rasterizer() = default; Rasterizer::~Rasterizer() = default;
void Rasterizer::DrawIndex() { void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
static bool first_time = true; const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
if (first_time) { const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
first_time = false; pipeline->BindResources(memory);
return;
}
UpdateDynamicState();
pipeline_cache.BindPipeline();
const u32 pitch = regs.color_buffers[0].Pitch();
const u32 height = regs.color_buffers[0].Height();
const u32 tile_max = regs.color_buffers[0].slice.tile_max;
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch);
const vk::RenderingAttachmentInfo color_info = { const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view, .imageView = *image_view.image_view,
@ -61,13 +54,50 @@ void Rasterizer::DrawIndex() {
.pColorAttachments = &color_info, .pColorAttachments = &color_info,
}; };
UpdateDynamicState();
cmdbuf.beginRendering(rendering_info); cmdbuf.beginRendering(rendering_info);
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering(); cmdbuf.endRendering();
} }
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return 0;
}
const VAddr index_address = regs.index_base_address.Address();
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
const vk::IndexType index_type =
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
: vk::IndexType::eUint32;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
return regs.num_indices;
}
void Rasterizer::UpdateDynamicState() { void Rasterizer::UpdateDynamicState() {
UpdateViewportScissorState(); UpdateViewportScissorState();
} }

View File

@ -3,7 +3,6 @@
#pragma once #pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
struct Liverpool; struct Liverpool;
} }
namespace Core {
class MemoryManager;
}
namespace VideoCore { namespace VideoCore {
class TextureCache; class TextureCache;
} }
@ -26,17 +29,14 @@ public:
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
~Rasterizer(); ~Rasterizer();
/// Performs a draw call with an index buffer. void Draw(bool is_indexed);
void DrawIndex();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();
private: private:
/// Updates viewport and scissor from liverpool registers. u32 SetupIndexBuffer(bool& is_indexed);
void UpdateViewportScissorState(); void MapMemory(VAddr addr, size_t size);
/// Updates depth and stencil pipeline state from liverpool registers. void UpdateDynamicState();
void UpdateViewportScissorState();
void UpdateDepthStencilState(); void UpdateDepthStencilState();
private: private:
@ -44,6 +44,7 @@ private:
Scheduler& scheduler; Scheduler& scheduler;
VideoCore::TextureCache& texture_cache; VideoCore::TextureCache& texture_cache;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer; StreamBuffer vertex_index_buffer;
}; };

View File

@ -35,7 +35,7 @@ public:
* @param size Size to reserve. * @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/ */
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size); void Commit(u64 size);

View File

@ -3,6 +3,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h" #include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image.h"
@ -65,6 +66,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
} }
} }
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();
pitch = size.width;
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {} : device{device_}, allocator{allocator_} {}

View File

@ -6,6 +6,7 @@
#include "common/enum.h" #include "common/enum.h"
#include "common/types.h" #include "common/types.h"
#include "core/libraries/videoout/buffer.h" #include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h" #include "video_core/texture_cache/types.h"
@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo { struct ImageInfo {
ImageInfo() = default; ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
bool is_tiled = false; bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined; vk::Format pixel_format = vk::Format::eUndefined;

View File

@ -101,8 +101,8 @@ TextureCache::~TextureCache() {
} }
void TextureCache::OnCpuWrite(VAddr address) { void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1); const VAddr address_aligned = address & ~((1 << PageShift) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) { ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again. // Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified; image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely. // Untrack image, so the range is unprotected and the guest can write freely.
@ -137,26 +137,20 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
return image; return image;
} }
ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
boost::container::small_vector<ImageId, 2> image_ids; const ImageInfo info{buffer};
ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { auto& image = FindImage(info, buffer.Address());
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); ImageViewInfo view_info;
auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; view_info.format = info.pixel_format;
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
ImageViewInfo info;
info.format = vk::Format::eB8G8R8A8Srgb;
if (const ImageViewId view_id = image->FindView(info); view_id) {
return slot_image_views[view_id]; return slot_image_views[view_id];
} }
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); const ImageViewId view_id =
image->image_view_infos.emplace_back(info); slot_image_views.insert(instance, scheduler, view_info, image.image);
image->image_view_ids.emplace_back(view_id); image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id]; return slot_image_views[view_id];
} }
@ -225,13 +219,13 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page); const auto page_it = page_table.find(page);
if (page_it == page_table.end()) { if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
return; return;
} }
auto& image_ids = page_it.value(); auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id); const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) { if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits); ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
return; return;
} }
image_ids.erase(vector_it); image_ids.erase(vector_it);

View File

@ -37,7 +37,7 @@ public:
Image& FindImage(const ImageInfo& info, VAddr cpu_address); Image& FindImage(const ImageInfo& info, VAddr cpu_address);
/// Retrieves the render target with specified properties /// Retrieves the render target with specified properties
ImageView& RenderTarget(VAddr cpu_address, u32 pitch); ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
/// Reuploads image contents. /// Reuploads image contents.
void RefreshImage(Image& image); void RefreshImage(Image& image);