video_core: Bringup some basic functionality (#145)

* video_core: Remove hack in rasterizer

* The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers

* shader_recompiler: Implement attribute loads/stores

* video_core: Add basic vertex, index buffer handling and pipeline caching

* externals: Make xxhash lowercase
This commit is contained in:
TheTurtle 2024-05-25 15:33:15 +03:00 committed by GitHub
parent e9f64bb76c
commit 3c90b8ac00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 1030 additions and 383 deletions

6
.gitmodules vendored
View File

@ -50,12 +50,12 @@
[submodule "externals/toml11"]
path = externals/toml11
url = https://github.com/ToruNiina/toml11.git
[submodule "externals/xxHash"]
path = externals/xxHash
url = https://github.com/Cyan4973/xxHash.git
[submodule "externals/zydis"]
path = externals/zydis
url = https://github.com/zyantific/zydis.git
[submodule "externals/sirit"]
path = externals/sirit
url = https://github.com/raphaelthegreat/sirit
[submodule "externals/xxhash"]
path = externals/xxhash
url = https://github.com/Cyan4973/xxHash.git

View File

@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/frontend/control_flow_graph.h
src/shader_recompiler/frontend/decode.cpp
src/shader_recompiler/frontend/decode.h
src/shader_recompiler/frontend/fetch_shader.cpp
src/shader_recompiler/frontend/fetch_shader.h
src/shader_recompiler/frontend/format.cpp
src/shader_recompiler/frontend/instruction.cpp
src/shader_recompiler/frontend/instruction.h
@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
src/shader_recompiler/ir/passes/info_collection.cpp
src/shader_recompiler/ir/passes/passes.h
src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp

View File

@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
add_subdirectory(toml11 EXCLUDE_FROM_ALL)
# xxHash
add_library(xxhash INTERFACE)
target_include_directories(xxhash INTERFACE xxhash)
add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
target_include_directories(xxhash PUBLIC xxhash)
# Zydis
option(ZYDIS_BUILD_TOOLS "" OFF)

View File

@ -7,6 +7,7 @@
#include "common/scope_exit.h"
#include "core/libraries/error_codes.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Core {
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
if (type == VMAType::Direct) {
MapVulkanMemory(mapped_addr, size);
}
};
// When virtual addr is zero let the address space manager pick the address.
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
"Attempting to unmap partially mapped range");
if (it->second.type == VMAType::Direct) {
UnmapVulkanMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours.
auto& vma = it->second;
vma.type = VMAType::Free;
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
impl.Unmap(virtual_addr, size);
}
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
auto it = mapped_memories.upper_bound(addr);
it = std::prev(it);
ASSERT(it != mapped_memories.end() && it->first <= addr);
return std::make_pair(*it->second.buffer, addr - it->first);
}
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
return iter;
}
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
const vk::Device device = instance->GetDevice();
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
void* host_pointer = reinterpret_cast<void*>(addr);
const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
ASSERT(host_mem_props.memoryTypeBits != 0);
int mapped_memory_type = -1;
auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
u32 host_mem_types = host_mem_props.memoryTypeBits;
while (host_mem_types != 0) {
// Try to find a cached memory type
mapped_memory_type = std::countr_zero(host_mem_types);
host_mem_types -= (1 << mapped_memory_type);
if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
return;
}
}
mapped_memory_type = -1;
};
// First try to find a memory that is both coherent and cached
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
vk::MemoryPropertyFlagBits::eHostCached);
if (mapped_memory_type == -1)
// Then only coherent (lower performance)
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
if (mapped_memory_type == -1) {
LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
}
const vk::StructureChain alloc_info = {
vk::MemoryAllocateInfo{
.allocationSize = size,
.memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
},
vk::ImportMemoryHostPointerInfoEXT{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = host_pointer,
},
};
const auto [it, new_memory] = mapped_memories.try_emplace(addr);
ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
auto& memory = it->second;
memory.backing = device.allocateMemoryUnique(alloc_info.get());
constexpr vk::BufferUsageFlags MapFlags =
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformBuffer;
const vk::StructureChain buffer_info = {
vk::BufferCreateInfo{
.size = size,
.usage = MapFlags,
.sharingMode = vk::SharingMode::eExclusive,
},
vk::ExternalMemoryBufferCreateInfoKHR{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
}};
memory.buffer = device.createBufferUnique(buffer_info.get());
device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
}
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
const auto it = mapped_memories.find(addr);
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
mapped_memories.erase(it);
}
} // namespace Core

View File

@ -3,6 +3,7 @@
#pragma once
#include <functional>
#include <string_view>
#include <vector>
#include <boost/icl/split_interval_map.hpp>
@ -10,6 +11,11 @@
#include "common/singleton.h"
#include "common/types.h"
#include "core/address_space.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
}
namespace Core {
@ -86,6 +92,10 @@ public:
explicit MemoryManager();
~MemoryManager();
void SetInstance(const Vulkan::Instance* instance_) {
instance = instance_;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type);
@ -97,11 +107,9 @@ public:
void UnmapMemory(VAddr virtual_addr, size_t size);
private:
bool HasOverlap(VAddr addr, size_t size) const {
return vma_map.find(addr) != vma_map.end();
}
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
private:
VMAHandle FindVMA(VAddr target) {
// Return first the VMA with base >= target.
const auto it = vma_map.lower_bound(target);
@ -117,10 +125,22 @@ private:
VMAHandle MergeAdjacent(VMAHandle iter);
void MapVulkanMemory(VAddr addr, size_t size);
void UnmapVulkanMemory(VAddr addr, size_t size);
private:
AddressSpace impl;
std::vector<DirectMemoryArea> allocations;
VMAMap vma_map;
struct MappedMemory {
vk::UniqueBuffer buffer;
vk::UniqueDeviceMemory backing;
size_t buffer_size;
};
std::map<VAddr, MappedMemory> mapped_memories;
const Vulkan::Instance* instance{};
};
using Memory = Common::Singleton<MemoryManager>;

View File

@ -20,7 +20,6 @@
#include "core/libraries/libs.h"
#include "core/libraries/videoout/video_out.h"
#include "core/linker.h"
#include "core/tls.h"
#include "input/controller.h"
#include "sdl_window.h"

View File

@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.stage) {
switch (program.info.stage) {
case Stage::Compute: {
// const std::array<u32, 3> workgroup_size{program.workgroup_size};
// execution_model = spv::ExecutionModel::GLCompute;
@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
// }
break;
default:
throw NotImplementedException("Stage {}", u32(program.stage));
throw NotImplementedException("Stage {}", u32(program.info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -222,7 +222,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, Binding
EmitContext ctx{profile, program, bindings};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
if (program.stage == Stage::Vertex) {
if (program.info.stage == Stage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}

View File

@ -10,12 +10,11 @@ namespace {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index).at(element)};
const auto& info{ctx.output_params.at(index)};
if (info.num_components == 1) {
return info.id;
} else {
const u32 index_element{element - info.first_element};
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element));
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
}
}
switch (attr) {
@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::
throw LogicError("Unreachable instruction");
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const u32 element{static_cast<u32>(attr) % 4};
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(element == 3 ? 1.0f : 0.0f);
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))};
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
}
throw NotImplementedException("Read attribute {}", attr);
}
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
switch (attr) {
case IR::Attribute::VertexId:
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Param0) {
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, value);
}

View File

@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
const IR::Value& offset);
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);

View File

@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} {
u32& uniform_binding{bindings.unified};
u32& storage_binding{bindings.unified};
u32& texture_binding{bindings.unified};
@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() {
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32");
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
}
@ -107,26 +111,123 @@ void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineOutputs(program);
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4];
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
case AmdGpu::NumberFormat::Sint:
return {id, input_s32, S32[1], 4};
case AmdGpu::NumberFormat::Sscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uscaled:
return {id, input_f32, F32[1], 4};
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
case 1:
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
case 2:
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
case 3:
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
default:
UNREACHABLE();
}
}
void EmitContext::DefineInputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
break;
case Stage::Fragment:
for (const auto& input : info.ps_inputs) {
if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
input_f32, F32[1]};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]};
const Id id{DefineInput(type, input.semantic)};
if (input.is_flat) {
Decorate(id, spv::Decoration::Flat);
}
Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
interfaces.push_back(id);
}
default:
break;
}
}
void EmitContext::DefineOutputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components};
interfaces.push_back(id);
}
break;
case Stage::Fragment:
frag_color[0] = DefineOutput(F32[4], 0);
Name(frag_color[0], fmt::format("frag_color{}", 0));
interfaces.push_back(frag_color[0]);
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
continue;
}
frag_color[i] = DefineOutput(F32[4], i);
Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]);
}
break;
default:
break;

View File

@ -135,6 +135,9 @@ public:
Id u32_zero_value{};
Id f32_zero_value{};
Id input_u32{};
Id input_f32{};
Id input_s32{};
Id output_u32{};
Id output_f32{};
@ -145,25 +148,22 @@ public:
Id base_vertex{};
std::array<Id, 8> frag_color{};
struct InputParamInfo {
struct SpirvAttribute {
Id id;
Id pointer_type;
Id component_type;
u32 num_components;
};
std::array<InputParamInfo, 32> input_params{};
struct ParamElementInfo {
Id id{};
u32 first_element{};
u32 num_components{};
};
std::array<std::array<ParamElementInfo, 4>, 32> output_params{};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};
private:
void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program);
void DefineInputs(const IR::Program& program);
void DefineOutputs(const IR::Program& program);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};
} // namespace Shader::Backend::SPIRV

View File

@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/static_vector.hpp>
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"
namespace Shader::Gcn {
/**
* s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04
* s_load_dwordx4 s[16:19], s[2:3], 0x08
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen
* buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen
* buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen
* s_waitcnt 0
* s_setpc_b64 s[0:1]
* s_load_dwordx4 s[4:7], s[2:3], 0x0
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen
* s_load_dwordx4 s[4:7], s[2:3], 0x8
* s_waitcnt lgkmcnt(0)
* buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen
* s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
* s_setpc_b64 s[0:1]
* A normal fetch shader looks like the above, the instructions are generated
* using input semantics on cpu side. Load instructions can either be separate or interleaved
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
s32 dst_reg{-1};
};
boost::container::static_vector<VsharpLoad, 16> loads;
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}
if (inst.inst_class == InstClass::ScalarMemRd) {
loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code);
continue;
}
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(
loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++;
attrib.dest_vgpr = inst.src[1].code;
attrib.num_elements = inst.control.mubuf.count;
attrib.sgpr_base = it->base_sgpr;
attrib.dword_offset = it->dword_offset;
// Mark load as used.
it->dst_reg = -1;
}
}
return attributes;
}
} // namespace Shader::Gcn

View File

@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/types.h"
namespace Shader::Gcn {
struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
} // namespace Shader::Gcn

View File

@ -600,9 +600,9 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Stage stage_)
Info& info_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} {
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
@ -633,8 +633,7 @@ private:
ensure_block();
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stage, inst_list.subspan(start, size));
fmt::print("{}\n", IR::DumpBlock(*current_block));
Translate(current_block, inst_list.subspan(start, size), info);
break;
}
case StatementType::SetVariable: {
@ -812,17 +811,17 @@ private:
IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{};
std::span<const GcnInst> inst_list;
Stage stage;
Info& info;
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Stage stage) {
CFG& cfg, Info& info) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage};
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
return syntax_list;
}

View File

@ -10,13 +10,13 @@
#include "shader_recompiler/object_pool.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
Stage stage);
Info& info);
} // namespace Shader::Gcn

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Gcn {
Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} {
Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (stage) {
switch (info.stage) {
case Stage::Vertex:
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
@ -92,11 +94,39 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
}
}
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list) {
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
}
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
});
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
}
Translator translator{block, stage};
Translator translator{block, info};
for (const auto& inst : inst_list) {
switch (inst.opcode) {
case Opcode::S_MOV_B32:
@ -115,6 +145,9 @@ void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list
translator.V_MUL_F32(inst);
break;
case Opcode::S_SWAPPC_B64:
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT:
break; // Ignore for now.
case Opcode::S_BUFFER_LOAD_DWORDX16:

View File

@ -7,9 +7,10 @@
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
@ -25,7 +26,9 @@ enum class ConditionOp : u32 {
class Translator {
public:
explicit Translator(IR::Block* block_, Stage stage);
explicit Translator(IR::Block* block_, Info& info);
void EmitFetch(const GcnInst& inst);
// Scalar ALU
void S_MOV(const GcnInst& inst);
@ -66,8 +69,9 @@ private:
private:
IR::Block* block;
IR::IREmitter ir;
Info& info;
};
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list);
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn

View File

@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])),
ir.FPConvert(16, GetSrc(inst.src[1])));
ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32));
const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
}
void Translator::V_MUL_F32(const GcnInst& inst) {

View File

@ -7,7 +7,9 @@ namespace Shader::Gcn {
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr};
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
attr.semantic = inst.control.vintrp.attr;
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
}

View File

@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) {
return "Param31";
case Attribute::VertexId:
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::FragCoord:
return "FragCoord";
default:
break;
}

View File

@ -72,10 +72,12 @@ enum class Attribute : u64 {
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
Max,
};
constexpr size_t EXP_NUM_POS = 4;
constexpr size_t EXP_NUM_PARAM = 32;
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32;
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::Param0)) {
if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);

View File

@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return GetAttribute(attribute, 0);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
return GetAttributeU32(attribute, 0);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
return Inst<U32>(Opcode::GetAttributeU32, attribute, Imm32(comp));
}

View File

@ -58,11 +58,9 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp);
void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);

View File

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: {
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
}
case IR::Opcode::SetAttribute: {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
default:
break;
}
}
void CollectShaderInfoPass(IR::Program& program) {
Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst);
}
}
}
} // namespace Shader::Optimization

View File

@ -4,6 +4,7 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
@ -11,6 +12,7 @@ void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
} // namespace Shader::Optimization

View File

@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) {
};
}
void ResourceTrackingPass(IR::BlockList& program) {
for (IR::Block* const block : program) {
void ResourceTrackingPass(IR::Program& program) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) {
continue;
}
printf("ff\n");
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,

View File

@ -3,15 +3,11 @@
#pragma once
#include <array>
#include <string>
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader {
enum class Stage : u32;
}
#include "shader_recompiler/runtime_info.h"
namespace Shader::IR {
@ -20,7 +16,7 @@ struct Program {
BlockList blocks;
BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list;
Stage stage;
Info info;
};
[[nodiscard]] std::string DumpProgram(const Program& program);

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
@ -30,9 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks;
}
std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32> token) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -40,6 +38,11 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
Gcn::GcnDecodeContext decoder;
static int counter = 0;
std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary);
file.write((const char*)token.data(), token.size_bytes());
file.close();
// Decode and save instructions
IR::Program program;
program.ins_list.reserve(token.size());
@ -52,21 +55,24 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage);
program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
program.stage = stage;
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
// Shader::Optimization::ResourceTrackingPass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
Shader::Optimization::CollectShaderInfoPass(program);
// TODO: Pass profile from vulkan backend
const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program);
return code;
for (const auto& block : program.blocks) {
fmt::print("{}\n", IR::DumpBlock(*block));
}
return program;
}
} // namespace Shader

View File

@ -3,7 +3,9 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
@ -26,8 +28,8 @@ struct BinaryInfo {
u32 crc32;
};
[[nodiscard]] std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32> code);
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info);
} // namespace Shader

View File

@ -3,39 +3,16 @@
#pragma once
#include <array>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/type.h"
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/ir/attribute.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader {
enum class AttributeType : u8 {
Float,
SignedInt,
UnsignedInt,
SignedScaled,
UnsignedScaled,
Disabled,
};
enum class InputTopology {
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency,
};
enum class CompareFunction {
Never,
Less,
Equal,
LessThanEqual,
Greater,
NotEqual,
GreaterThanEqual,
Always,
};
static constexpr size_t NumUserDataRegs = 16;
enum class Stage : u32 {
Vertex,
@ -62,78 +39,64 @@ enum class TextureType : u32 {
};
constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class Interpolation {
Smooth,
Flat,
NoPerspective,
};
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct TextureDescriptor {
TextureType type;
bool is_eud;
bool is_depth;
bool is_multisample;
bool is_storage;
u32 count;
u32 eud_offset_dwords;
u32 ud_index_dwords;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
struct Info {
bool uses_workgroup_id{};
bool uses_local_invocation_id{};
bool uses_invocation_id{};
bool uses_invocation_info{};
bool uses_sample_id{};
struct VsInput {
AmdGpu::NumberFormat fmt;
u16 binding;
u16 num_components;
u8 sgpr_base;
u8 dword_offset;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
std::array<Interpolation, 32> interpolation{};
// VaryingState loads;
// VaryingState stores;
// VaryingState passthrough;
struct PsInput {
u32 param_index;
u32 semantic;
bool is_default;
bool is_flat;
u32 default_value;
};
boost::container::static_vector<PsInput, 32> ps_inputs{};
std::array<bool, 8> stores_frag_color{};
bool stores_sample_mask{};
bool stores_frag_depth{};
struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[Index(attrib)] & (1 << comp);
}
bool uses_fp16{};
bool uses_fp64{};
bool uses_fp16_denorms_flush{};
bool uses_fp16_denorms_preserve{};
bool uses_fp32_denorms_flush{};
bool uses_fp32_denorms_preserve{};
bool uses_int8{};
bool uses_int16{};
bool uses_int64{};
bool uses_image_1d{};
bool uses_sampled_1d{};
bool uses_subgroup_vote{};
bool uses_subgroup_mask{};
bool uses_derivatives{};
bool GetAny(IR::Attribute attrib) const {
return flags[Index(attrib)];
}
IR::Type used_constant_buffer_types{};
IR::Type used_storage_buffer_types{};
IR::Type used_indirect_cbuf_types{};
void Set(IR::Attribute attrib, u32 comp = 0) {
flags[Index(attrib)] |= (1 << comp);
}
// std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
u32 used_clip_distances{};
u32 NumComponents(IR::Attribute attrib) const {
const u8 mask = flags[Index(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask);
}
// boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
// constant_buffer_descriptors;
// boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS>
// storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors;
// ImageBufferDescriptors image_buffer_descriptors;
// TextureDescriptors texture_descriptors;
// ImageDescriptors image_descriptors;
static size_t Index(IR::Attribute attrib) {
return static_cast<size_t>(attrib);
}
std::array<u8, IR::NumAttributes> flags;
};
AttributeFlags loads{};
AttributeFlags stores{};
std::span<const u32> user_data;
Stage stage;
template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data;
u32* base;
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
std::memcpy(&data, base + dword_offset, sizeof(T));
return data;
}
};
} // namespace Shader

View File

@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
rasterizer->DrawIndex();
rasterizer->Draw(true);
}
break;
}
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex();
if (rasterizer) {
rasterizer->Draw(false);
}
break;
}
case PM4ItOpcode::DispatchDirect: {

View File

@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
@ -32,13 +33,13 @@ struct Liverpool {
static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumWordsShaderUserData = 16;
static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumWordsShaderUserData>;
using UserData = std::array<u32, NumShaderUserData>;
struct ShaderProgram {
u32 address_lo;
@ -57,6 +58,14 @@ struct Liverpool {
}
};
union PsInputControl {
u32 raw;
BitField<0, 5, u32> input_offset;
BitField<5, 1, u32> use_default;
BitField<8, 2, u32> default_value;
BitField<10, 1, u32> flat_shade;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
@ -171,25 +180,6 @@ struct Liverpool {
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
};
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
@ -227,9 +217,45 @@ struct Liverpool {
BitField<24, 8, u32> stencil_op_val;
};
union StencilInfo {
u32 raw;
BitField<0, 1, u32> format;
struct DepthBuffer {
enum class ZFormat : u32 {
Invald = 0,
Z16 = 1,
Z32Float = 2,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
};
enum class ClipSpace : u32 {
@ -423,39 +449,6 @@ struct Liverpool {
Swap8In64 = 3,
};
enum class Format : u32 {
Invalid = 0,
Color_8 = 1,
Color_16 = 2,
Color_8_8 = 3,
Color_32 = 4,
Color_16_16 = 5,
Color_10_11_11 = 6,
Color_11_11_10 = 7,
Color_10_10_10_2 = 8,
Color_2_10_10_10 = 9,
Color_8_8_8_8 = 10,
Color_32_32 = 11,
Color_16_16_16_16 = 12,
Color_32_32_32_32 = 14,
Color_5_6_5 = 16,
Color_1_5_5_5 = 17,
Color_5_5_5_1 = 18,
Color_4_4_4_4 = 19,
Color_8_24 = 20,
Color_24_8 = 21,
Color_X24_8_32_FL = 22,
};
enum class NumberType : u32 {
Unorm = 0,
Snorm = 1,
Uint = 4,
Sint = 5,
Srgb = 6,
Float = 7,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
@ -482,9 +475,9 @@ struct Liverpool {
} view;
union {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, Format> format;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberType> number_type;
BitField<8, 2, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
@ -529,6 +522,12 @@ struct Liverpool {
u64 CmaskAddress() const {
return u64(cmask_base_address) << 8;
}
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
};
enum class PrimitiveType : u32 {
@ -563,14 +562,8 @@ struct Liverpool {
u32 stencil_clear;
u32 depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
StencilInfo stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer;
INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
@ -584,9 +577,12 @@ struct Liverpool {
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA1B1 - 0xA187);
INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1);
INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
@ -616,6 +612,17 @@ struct Liverpool {
VgtNumInstances num_instances;
};
std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
};
Regs regs{};
@ -656,14 +663,16 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);

View File

@ -2,11 +2,45 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/assert.h"
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
u32 getNumComponents(DataFormat format) {
std::string_view NameOf(NumberFormat fmt) {
switch (fmt) {
case NumberFormat::Unorm:
return "Unorm";
case NumberFormat::Snorm:
return "Snorm";
case NumberFormat::Uscaled:
return "Uscaled";
case NumberFormat::Sscaled:
return "Sscaled";
case NumberFormat::Uint:
return "Uint";
case NumberFormat::Sint:
return "Sint";
case NumberFormat::SnormNz:
return "SnormNz";
case NumberFormat::Float:
return "Float";
case NumberFormat::Srgb:
return "Srgb";
case NumberFormat::Ubnorm:
return "Ubnorm";
case NumberFormat::UbnromNz:
return "UbnormNz";
case NumberFormat::Ubint:
return "Ubint";
case NumberFormat::Ubscaled:
return "Unscaled";
default:
UNREACHABLE();
}
}
u32 NumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,

View File

@ -3,6 +3,8 @@
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
@ -59,6 +61,18 @@ enum class NumberFormat : u32 {
Ubscaled = 13,
};
u32 getNumComponents(DataFormat format);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
u32 NumComponents(DataFormat format);
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::NumberFormat> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};

View File

@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleStrip:
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
case Liverpool::PrimitiveType::QuadList:
// Needs to generate index buffer on the fly.
return vk::PrimitiveTopology::eTriangleList;
default:
UNREACHABLE();
return vk::PrimitiveTopology::eTriangleList;
@ -110,4 +113,42 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
}
}
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
}
UNREACHABLE();
}
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format) {
UNREACHABLE();
}
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 3;
}
}
} // namespace Vulkan::LiverpoolToVK

View File

@ -4,6 +4,7 @@
#pragma once
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan::LiverpoolToVK {
@ -20,4 +21,11 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK

View File

@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
if (!frame) {
if (!splash_img.has_value()) {
VideoCore::ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Srgb;
info.type = vk::ImageType::e2D;
@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
}
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();

View File

@ -4,22 +4,58 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const PipelineKey& key_, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
: instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
continue;
}
stages[i] = *infos[i];
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.inputRate = vk::VertexInputRate::eVertex,
});
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 0U,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0U,
.pVertexAttributeDescriptions = nullptr,
.vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = attributes.data(),
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main",
};
const vk::Format color_format = vk::Format::eB8G8R8A8Srgb;
const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1,
.pColorAttachmentFormats = &color_format,
.depthAttachmentFormat = vk::Format::eUndefined,
.colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined,
};
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = pipeline_layout,
.layout = *pipeline_layout,
};
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
}
} // namespace Vulkan

View File

@ -1,19 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5;
class Instance;
class Scheduler;
using Liverpool = AmdGpu::Liverpool;
struct PipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
Liverpool::DepthControl depth;
Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<PipelineKey>);
class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const PipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline;
}
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::array<Shader::Info, MaxShaderStages> stages;
PipelineKey key;
};
} // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View File

@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
}
void Instance::CollectToolingInfo() {

View File

@ -1,9 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h"
#include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -11,60 +13,123 @@
namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = user_data;
info.stage = stage;
switch (stage) {
case Shader::Stage::Fragment: {
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
default:
break;
}
return info;
}
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096},
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
}
void PipelineCache::BindPipeline() {
SCOPE_EXIT {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
};
const GraphicsPipeline* PipelineCache::GetPipeline() {
RefreshKey();
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) {
it.value() = CreatePipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
if (pipeline) {
return;
void PipelineCache::RefreshKey() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
const auto& cb = regs.color_buffers[i];
key.color_formats[i] = cb.base_address
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
: vk::Format::eUndefined;
}
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
const u32* token = pgm.Address<u32>();
for (u32 i = 0; i < MaxShaderStages; i++) {
auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
key.stage_hashes[i] = 0;
continue;
}
const u32* code = pgm->Address<u32>();
// Retrieve shader header.
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
key.stage_hashes[i] = bininfo.shader_hash;
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
const auto& regs = liverpool->regs;
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!graphics_key.stage_hashes[i]) {
stages[i] = VK_NULL_HANDLE;
continue;
}
auto* pgm = regs.ProgramForStage(i);
const u32* code = pgm->Address<u32>();
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
// Lookup if the shader already exists.
const auto it = module_map.find(bininfo.shader_hash);
if (it != module_map.end()) {
return *it->second;
stages[i] = *it->second;
continue;
}
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data);
return CompileSPV(program, instance.GetDevice());
};
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
// Retrieve shader stage modules.
// TODO: Only do this when program address is changed.
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
// Recompile shader to IR.
const auto stage = Shader::Stage{i};
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
std::move(info));
// Bind pipeline.
// TODO: Read entire key based on reg state.
graphics_key.prim_type = liverpool->regs.primitive_type;
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
*pipeline_layout, stages);
// Compile IR to SPIR-V
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
}
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
infos, stages);
}
} // namespace Vulkan

View File

@ -8,6 +8,10 @@
#include "shader_recompiler/object_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
struct Info;
}
namespace Vulkan {
class Instance;
@ -21,7 +25,12 @@ public:
AmdGpu::Liverpool* liverpool);
~PipelineCache() = default;
void BindPipeline();
const GraphicsPipeline* GetPipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
private:
const Instance& instance;
@ -31,7 +40,7 @@ private:
vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{};
std::unique_ptr<GraphicsPipeline> pipeline;
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
PipelineKey graphics_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,33 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
memory->SetInstance(&instance);
}
Rasterizer::~Rasterizer() = default;
void Rasterizer::DrawIndex() {
void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs;
static bool first_time = true;
if (first_time) {
first_time = false;
return;
}
UpdateDynamicState();
pipeline_cache.BindPipeline();
const u32 pitch = regs.color_buffers[0].Pitch();
const u32 height = regs.color_buffers[0].Height();
const u32 tile_max = regs.color_buffers[0].slice.tile_max;
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch);
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
pipeline->BindResources(memory);
const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view,
@ -61,13 +54,50 @@ void Rasterizer::DrawIndex() {
.pColorAttachments = &color_info,
};
UpdateDynamicState();
cmdbuf.beginRendering(rendering_info);
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
}
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return 0;
}
const VAddr index_address = regs.index_base_address.Address();
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
const vk::IndexType index_type =
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
: vk::IndexType::eUint32;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
return regs.num_indices;
}
void Rasterizer::UpdateDynamicState() {
UpdateViewportScissorState();
}

View File

@ -3,7 +3,6 @@
#pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
struct Liverpool;
}
namespace Core {
class MemoryManager;
}
namespace VideoCore {
class TextureCache;
}
@ -26,17 +29,14 @@ public:
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
~Rasterizer();
/// Performs a draw call with an index buffer.
void DrawIndex();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();
void Draw(bool is_indexed);
private:
/// Updates viewport and scissor from liverpool registers.
void UpdateViewportScissorState();
u32 SetupIndexBuffer(bool& is_indexed);
void MapMemory(VAddr addr, size_t size);
/// Updates depth and stencil pipeline state from liverpool registers.
void UpdateDynamicState();
void UpdateViewportScissorState();
void UpdateDepthStencilState();
private:
@ -44,6 +44,7 @@ private:
Scheduler& scheduler;
VideoCore::TextureCache& texture_cache;
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer;
};

View File

@ -35,7 +35,7 @@ public:
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);

View File

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
@ -65,6 +66,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
}
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();
pitch = size.width;
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}

View File

@ -6,6 +6,7 @@
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
bool is_tiled = false;
vk::Format pixel_format = vk::Format::eUndefined;

View File

@ -101,8 +101,8 @@ TextureCache::~TextureCache() {
}
void TextureCache::OnCpuWrite(VAddr address) {
const VAddr address_aligned = address & ~((1 << PageBits) - 1);
ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) {
const VAddr address_aligned = address & ~((1 << PageShift) - 1);
ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
@ -137,26 +137,20 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
return image;
}
ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) {
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) {
if (image.cpu_addr == cpu_address) {
image_ids.push_back(image_id);
}
});
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
const ImageInfo info{buffer};
auto& image = FindImage(info, buffer.Address());
ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!");
auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()];
ImageViewInfo info;
info.format = vk::Format::eB8G8R8A8Srgb;
if (const ImageViewId view_id = image->FindView(info); view_id) {
ImageViewInfo view_info;
view_info.format = info.pixel_format;
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
return slot_image_views[view_id];
}
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image);
image->image_view_infos.emplace_back(info);
image->image_view_ids.emplace_back(view_id);
const ImageViewId view_id =
slot_image_views.insert(instance, scheduler, view_info, image.image);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
}
@ -225,13 +219,13 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
return;
}
auto& image_ids = page_it.value();
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits);
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
return;
}
image_ids.erase(vector_it);

View File

@ -37,7 +37,7 @@ public:
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
/// Retrieves the render target with specified properties
ImageView& RenderTarget(VAddr cpu_address, u32 pitch);
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
/// Reuploads image contents.
void RefreshImage(Image& image);