video_core: Attempt no2 for specialization
This commit is contained in:
parent
914dbdc509
commit
e96a6ebd9d
|
@ -521,6 +521,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/renderer_vulkan/vk_resource_pool.h
|
||||
src/video_core/renderer_vulkan/vk_scheduler.cpp
|
||||
src/video_core/renderer_vulkan/vk_scheduler.h
|
||||
src/video_core/renderer_vulkan/vk_shader_cache.cpp
|
||||
src/video_core/renderer_vulkan/vk_shader_cache.h
|
||||
src/video_core/renderer_vulkan/vk_shader_util.cpp
|
||||
src/video_core/renderer_vulkan/vk_shader_util.h
|
||||
src/video_core/renderer_vulkan/vk_swapchain.cpp
|
||||
|
|
|
@ -120,7 +120,6 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle,
|
|||
}
|
||||
|
||||
SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) {
|
||||
return nullptr;
|
||||
LOG_TRACE(Lib_AvPlayer, "called");
|
||||
if (data == nullptr) {
|
||||
return nullptr;
|
||||
|
|
|
@ -1066,16 +1066,7 @@ ScePthread PThreadPool::Create() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
auto* ret = new PthreadInternal{};
|
||||
#else
|
||||
// TODO: Linux specific hack
|
||||
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
|
||||
auto* ret = reinterpret_cast<PthreadInternal*>(
|
||||
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
|
||||
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
|
||||
#endif
|
||||
ret->is_free = false;
|
||||
ret->is_detached = false;
|
||||
ret->is_almost_done = false;
|
||||
|
|
|
@ -201,6 +201,12 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
if (info.uses_lane_id) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
}
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
|
||||
|
@ -219,10 +225,6 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
} else {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||
}
|
||||
if (info.uses_group_quad) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
|
||||
}
|
||||
if (info.has_discard) {
|
||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||
}
|
||||
|
|
|
@ -132,6 +132,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
case AmdGpu::NumberFormat::SnormNz:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ctx.F32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32;
|
||||
|
@ -140,7 +141,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
|
||||
|
@ -161,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
default:
|
||||
break;
|
||||
}
|
||||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBufferOffsets() {
|
||||
|
@ -204,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
}
|
||||
|
||||
void EmitContext::DefineInputs() {
|
||||
if (info.uses_lane_id) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
|
@ -238,9 +244,6 @@ void EmitContext::DefineInputs() {
|
|||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
|
@ -354,12 +357,12 @@ void EmitContext::DefineBuffers() {
|
|||
};
|
||||
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto sharp = desc.GetVsharp(info);
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(sharp);
|
||||
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, ConstU32(desc.length))};
|
||||
: TypeArray(data_type, ConstU32(sharp.NumDwords()))};
|
||||
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
|
||||
|
||||
const auto storage_class =
|
||||
|
@ -369,6 +372,9 @@ void EmitContext::DefineBuffers() {
|
|||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
if (is_storage && !desc.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
}
|
||||
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
|
||||
|
||||
buffers.push_back({
|
||||
|
@ -503,17 +509,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
|
||||
void EmitContext::DefineImagesAndSamplers() {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const VectorIds* data_types = [&] {
|
||||
switch (image_desc.nfmt) {
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return &U32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return &S32;
|
||||
default:
|
||||
return &F32;
|
||||
}
|
||||
}();
|
||||
const Id sampled_type = data_types->Get(1);
|
||||
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
|
||||
const Id sampled_type = data_types[1];
|
||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
|
@ -522,7 +519,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
|
||||
image_desc.dword_offset));
|
||||
images.push_back({
|
||||
.data_types = data_types,
|
||||
.data_types = &data_types,
|
||||
.id = id,
|
||||
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
|
||||
.pointer_type = pointer_type,
|
||||
|
@ -531,13 +528,12 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
interfaces.push_back(id);
|
||||
++binding;
|
||||
}
|
||||
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
|
||||
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
}
|
||||
if (info.samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
sampler_type = TypeSampler();
|
||||
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
|
||||
for (const auto& samp_desc : info.samplers) {
|
||||
|
@ -553,7 +549,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
}
|
||||
|
||||
void EmitContext::DefineSharedMemory() {
|
||||
static constexpr size_t DefaultSharedMemSize = 16_KB;
|
||||
static constexpr size_t DefaultSharedMemSize = 2_KB;
|
||||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitExport(const GcnInst& inst) {
|
||||
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
|
||||
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
|
||||
ir.Discard(ir.LogicalNot(ir.GetExec()));
|
||||
}
|
||||
|
||||
|
|
|
@ -399,7 +399,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
info.buffers.push_back({
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.length = buffer.num_records,
|
||||
.used_types = IR::Type::F32,
|
||||
.is_instance_data = true,
|
||||
});
|
||||
|
|
|
@ -415,14 +415,20 @@ void Translator::V_ADDC_U32(const GcnInst& inst) {
|
|||
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
|
||||
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
|
||||
|
||||
IR::U32 scarry;
|
||||
IR::U1 carry;
|
||||
if (inst.src_count == 3) { // VOP3
|
||||
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
|
||||
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
|
||||
if (inst.src[2].field == OperandField::VccLo) {
|
||||
carry = ir.GetVcc();
|
||||
} else if (inst.src[2].field == OperandField::ScalarGPR) {
|
||||
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else { // VOP2
|
||||
scarry = ir.GetVccLo();
|
||||
carry = ir.GetVcc();
|
||||
}
|
||||
|
||||
const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
|
||||
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
|
||||
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
|
@ -195,7 +196,6 @@ public:
|
|||
desc.inline_cbuf == existing.inline_cbuf;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
ASSERT(buffer.length == desc.length);
|
||||
buffer.used_types |= desc.used_types;
|
||||
buffer.is_written |= desc.is_written;
|
||||
return index;
|
||||
|
@ -227,7 +227,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
// Samplers with different bindings might still be the same.
|
||||
return existing.GetSsharp(info) == desc.GetSsharp(info);
|
||||
return existing.GetSharp(info) == desc.GetSharp(info);
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
@ -342,19 +342,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
};
|
||||
}
|
||||
|
||||
static u32 BufferLength(const AmdGpu::Buffer& buffer) {
|
||||
const auto stride = buffer.GetStride();
|
||||
if (stride < sizeof(f32)) {
|
||||
ASSERT(sizeof(f32) % stride == 0);
|
||||
return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride;
|
||||
} else if (stride == sizeof(f32)) {
|
||||
return buffer.num_records;
|
||||
} else {
|
||||
ASSERT(stride % sizeof(f32) == 0);
|
||||
return buffer.num_records * (stride / sizeof(f32));
|
||||
}
|
||||
}
|
||||
|
||||
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
AmdGpu::Buffer& cbuf) {
|
||||
|
||||
|
@ -381,7 +368,6 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
return descriptors.Add(BufferResource{
|
||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||
.dword_offset = 0,
|
||||
.length = BufferLength(cbuf),
|
||||
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
|
||||
.inline_cbuf = cbuf,
|
||||
});
|
||||
|
@ -399,7 +385,6 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.length = BufferLength(buffer),
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
.is_written = IsBufferStore(inst),
|
||||
});
|
||||
|
|
|
@ -50,6 +50,9 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::ImageQueryLod:
|
||||
info.has_image_query = true;
|
||||
break;
|
||||
case IR::Opcode::LaneId:
|
||||
info.uses_lane_id = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
@ -12,10 +13,6 @@
|
|||
#include "shader_recompiler/ir/type.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
namespace Shader {
|
||||
|
||||
static constexpr size_t NumUserDataRegs = 16;
|
||||
|
@ -78,31 +75,19 @@ struct Info;
|
|||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
u32 length;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_instance_data{};
|
||||
bool is_written{};
|
||||
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
|
||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
return buffer.GetSize() > MaxUboSize || is_written;
|
||||
}
|
||||
|
||||
u64 GetKey(const Info& info) const {
|
||||
const auto sharp = GetVsharp(info);
|
||||
u64 key = sharp.GetStride();
|
||||
if (!is_written) {
|
||||
key <<= 1;
|
||||
key |= IsStorage(sharp);
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
|
||||
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||
|
||||
struct TextureBufferResource {
|
||||
u32 sgpr_base;
|
||||
|
@ -110,16 +95,9 @@ struct TextureBufferResource {
|
|||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_written{};
|
||||
|
||||
u64 GetKey(const Info& info) const {
|
||||
const auto sharp = GetVsharp(info);
|
||||
const bool is_integer = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Uint ||
|
||||
sharp.GetNumberFmt() == AmdGpu::NumberFormat::Sint;
|
||||
return is_integer;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using TextureBufferResourceList = boost::container::static_vector<TextureBufferResource, 16>;
|
||||
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
||||
|
||||
struct ImageResource {
|
||||
u32 sgpr_base;
|
||||
|
@ -130,14 +108,9 @@ struct ImageResource {
|
|||
bool is_depth;
|
||||
bool is_atomic{};
|
||||
|
||||
u64 GetKey(const Info& info) const {
|
||||
const auto sharp = GetTsharp(info);
|
||||
return sharp.type;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Image GetTsharp(const Info& info) const noexcept;
|
||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
|
||||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||
|
||||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
|
@ -146,9 +119,9 @@ struct SamplerResource {
|
|||
u32 associated_image : 4;
|
||||
u32 disable_aniso : 1;
|
||||
|
||||
constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept;
|
||||
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
|
||||
|
||||
struct PushData {
|
||||
static constexpr size_t BufOffsetIndex = 2;
|
||||
|
@ -242,9 +215,10 @@ struct Info {
|
|||
bool has_discard{};
|
||||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{true};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
||||
|
@ -263,20 +237,6 @@ struct Info {
|
|||
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
|
||||
}
|
||||
|
||||
u64 GetStageSpecializedKey(u32 binding = 0) const noexcept {
|
||||
u64 key = HashCombine(pgm_hash, binding);
|
||||
for (const auto& buffer : buffers) {
|
||||
key = HashCombine(key, buffer.GetKey(*this));
|
||||
}
|
||||
for (const auto& buffer : texture_buffers) {
|
||||
key = HashCombine(key, buffer.GetKey(*this));
|
||||
}
|
||||
for (const auto& image : images) {
|
||||
key = HashCombine(key, image.GetKey(*this));
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
|
||||
u32 vertex_offset = 0;
|
||||
u32 instance_offset = 0;
|
||||
|
@ -290,19 +250,19 @@ struct Info {
|
|||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexcept {
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer TextureBufferResource::GetVsharp(const Info& info) const noexcept {
|
||||
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Image ImageResource::GetTsharp(const Info& info) const noexcept {
|
||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept {
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,10 @@ enum class NumberFormat : u32 {
|
|||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
|
@ -68,6 +69,10 @@ struct Buffer {
|
|||
return stride == 0 ? 1U : stride;
|
||||
}
|
||||
|
||||
u32 NumDwords() const noexcept {
|
||||
return Common::AlignUp(GetSize(), sizeof(u32)) >> 2;
|
||||
}
|
||||
|
||||
u32 GetSize() const noexcept {
|
||||
return GetStride() * num_records;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto sharp = buffer.GetVsharp(*info);
|
||||
const auto sharp = buffer.GetSharp(*info);
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
|
||||
|
@ -107,17 +107,17 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
Shader::PushData push_data{};
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(*info);
|
||||
const bool is_storage = buffer.IsStorage(vsharp);
|
||||
for (const auto& desc : info->buffers) {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
const bool is_storage = desc.IsStorage(vsharp);
|
||||
const VAddr address = vsharp.base_address;
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
||||
// can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
|
||||
// need its full emulation anyways. For cases of metadata read a warning will be logged.
|
||||
if (is_storage) {
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_WARNING(Render_Vulkan, "Metadata update skipped");
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -126,13 +126,12 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (buffer.is_written) {
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment =
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
if (adjust != 0) {
|
||||
|
@ -151,18 +150,28 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
});
|
||||
}
|
||||
|
||||
for (const auto& tex_buffer : info->texture_buffers) {
|
||||
const auto vsharp = tex_buffer.GetVsharp(*info);
|
||||
for (const auto& desc : info->texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (tex_buffer.is_written) {
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written);
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
|
@ -172,7 +181,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding, adjust / fmt_stride);
|
||||
}
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written,
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
}
|
||||
set_writes.push_back({
|
||||
|
@ -180,19 +189,23 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& image_desc : info->images) {
|
||||
const auto tsharp = image_desc.GetTsharp(*info);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
const auto tsharp = image_desc.GetSharp(*info);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
} else {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
|
@ -208,7 +221,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : info->samplers) {
|
||||
const auto ssharp = sampler.GetSsharp(*info);
|
||||
const auto ssharp = sampler.GetSharp(*info);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -307,7 +307,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
continue;
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto sharp = buffer.GetVsharp(*stage);
|
||||
const auto sharp = buffer.GetSharp(*stage);
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
|
||||
|
@ -356,7 +356,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
Shader::PushData push_data{};
|
||||
|
@ -371,7 +371,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(*stage);
|
||||
const auto vsharp = buffer.GetSharp(*stage);
|
||||
const bool is_storage = buffer.IsStorage(vsharp);
|
||||
if (vsharp) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
|
@ -405,7 +405,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
}
|
||||
|
||||
for (const auto& tex_buffer : stage->texture_buffers) {
|
||||
const auto vsharp = tex_buffer.GetVsharp(*stage);
|
||||
const auto vsharp = tex_buffer.GetSharp(*stage);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
|
@ -438,7 +438,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
|
||||
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
||||
for (const auto& image_desc : stage->images) {
|
||||
const auto tsharp = image_desc.GetTsharp(*stage);
|
||||
const auto tsharp = image_desc.GetSharp(*stage);
|
||||
if (tsharp) {
|
||||
tsharps.emplace_back(tsharp);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
|
@ -464,7 +464,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : stage->samplers) {
|
||||
auto ssharp = sampler.GetSsharp(*stage);
|
||||
auto ssharp = sampler.GetSharp(*stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
|
|
|
@ -278,6 +278,7 @@ bool Instance::CreateDevice() {
|
|||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.multiViewport = features.multiViewport,
|
||||
.samplerAnisotropy = features.samplerAnisotropy,
|
||||
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
|
||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||
.shaderImageGatherExtended = features.shaderImageGatherExtended,
|
||||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||
|
|
|
@ -1,122 +1,26 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_cache.h"
|
||||
|
||||
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
|
||||
u64 hash, const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = user_data;
|
||||
info.pgm_base = pgm_base;
|
||||
info.pgm_hash = hash;
|
||||
info.stage = stage;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = regs.ps_inputs[i].default_value,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
||||
block_pool{512} {
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
shader_cache{std::make_unique<ShaderCache>(instance, liverpool)} {
|
||||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() = default;
|
||||
|
||||
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
|
@ -257,7 +161,8 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
continue;
|
||||
}
|
||||
const auto stage = Shader::Stage{i};
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(pgm, stage, binding);
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) =
|
||||
shader_cache->GetProgram(pgm, stage, binding);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -265,47 +170,7 @@ void PipelineCache::RefreshComputeKey() {
|
|||
u32 binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
std::tie(infos[0], modules[0], compute_key) =
|
||||
GetProgram(cs_pgm, Shader::Stage::Compute, binding);
|
||||
}
|
||||
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<const u32> code,
|
||||
size_t perm_idx, u32& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
perm_idx != 0 ? "(permutation)" : "");
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const u64 key = info.GetStageSpecializedKey(binding);
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||
}
|
||||
|
||||
// Create module and set name to hash in renderdoc
|
||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||
ASSERT(module != VK_NULL_HANDLE);
|
||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, key, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return module;
|
||||
}
|
||||
|
||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
size_t perm_idx, std::string_view ext) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteSpan(code);
|
||||
shader_cache->GetProgram(cs_pgm, Shader::Stage::Compute, binding);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -4,9 +4,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <tsl/robin_map.h>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
|
@ -18,15 +15,7 @@ namespace Vulkan {
|
|||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
struct Program {
|
||||
using Module = std::pair<u64, vk::ShaderModule>;
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
};
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
|
||||
u64 hash, const AmdGpu::Liverpool::Regs& regs);
|
||||
class ShaderCache;
|
||||
|
||||
class PipelineCache {
|
||||
static constexpr size_t MaxShaderStages = 5;
|
||||
|
@ -34,7 +23,7 @@ class PipelineCache {
|
|||
public:
|
||||
explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
|
||||
AmdGpu::Liverpool* liverpool);
|
||||
~PipelineCache() = default;
|
||||
~PipelineCache();
|
||||
|
||||
const GraphicsPipeline* GetGraphicsPipeline();
|
||||
|
||||
|
@ -43,54 +32,6 @@ public:
|
|||
private:
|
||||
void RefreshGraphicsKey();
|
||||
void RefreshComputeKey();
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
|
||||
u32& binding);
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const auto* pgm,
|
||||
Shader::Stage stage,
|
||||
u32& binding) {
|
||||
// Fetch program for binaryinfo hash.
|
||||
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||
const u64 hash = bininfo->shader_hash;
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(hash);
|
||||
u64 stage_key{};
|
||||
if (new_program) {
|
||||
// Create a new program and a module with current runtime state.
|
||||
const VAddr pgm_base = pgm->template Address<VAddr>();
|
||||
auto program = program_pool.Create();
|
||||
program->info = MakeShaderInfo(stage, pgm->user_data, pgm_base, hash, liverpool->regs);
|
||||
u32 start_binding = binding;
|
||||
const auto module = CompileModule(program->info, pgm->Code(), 0, start_binding);
|
||||
stage_key = program->info.GetStageSpecializedKey(binding);
|
||||
program->modules.emplace_back(stage_key, module);
|
||||
it_pgm.value() = program;
|
||||
} else {
|
||||
stage_key = it_pgm->second->info.GetStageSpecializedKey(binding);
|
||||
}
|
||||
|
||||
Program* program = it_pgm->second;
|
||||
const auto& info = program->info;
|
||||
vk::ShaderModule module{};
|
||||
|
||||
// Compile specialized module with current runtime state.
|
||||
const auto it = std::ranges::find(program->modules, stage_key, &Program::Module::first);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = MakeShaderInfo(stage, pgm->user_data, info.pgm_base, info.pgm_hash,
|
||||
liverpool->regs);
|
||||
const size_t perm_idx = program->modules.size();
|
||||
module = CompileModule(new_info, pgm->Code(), perm_idx, binding);
|
||||
program->modules.emplace_back(stage_key, module);
|
||||
} else {
|
||||
binding += info.NumBindings();
|
||||
module = it->second;
|
||||
}
|
||||
|
||||
const u64 full_hash = HashCombine(hash, stage_key);
|
||||
return std::make_tuple(&info, module, full_hash);
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -98,17 +39,13 @@ private:
|
|||
AmdGpu::Liverpool* liverpool;
|
||||
vk::UniquePipelineCache pipeline_cache;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
tsl::robin_map<size_t, Program*> program_cache;
|
||||
std::unique_ptr<ShaderCache> shader_cache;
|
||||
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules{};
|
||||
Shader::Profile profile{};
|
||||
GraphicsPipelineKey graphics_key{};
|
||||
u64 compute_key{};
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Common::ObjectPool<Program> program_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
|
||||
u64 hash, const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = user_data;
|
||||
info.pgm_base = pgm_base;
|
||||
info.pgm_hash = hash;
|
||||
info.stage = stage;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = regs.ps_inputs[i].default_value,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} {
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span<const u32> code,
|
||||
size_t perm_idx, u32& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
perm_idx != 0 ? "(permutation)" : "");
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||
}
|
||||
|
||||
// Create module and set name to hash in renderdoc
|
||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||
ASSERT(module != VK_NULL_HANDLE);
|
||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return module;
|
||||
}
|
||||
|
||||
void ShaderCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
size_t perm_idx, std::string_view ext) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteSpan(code);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -0,0 +1,184 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
||||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
|
||||
auto operator<=>(const BufferSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct TextureBufferSpecialization {
|
||||
bool is_integer;
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct ImageSpecialization {
|
||||
AmdGpu::ImageType type;
|
||||
bool is_integer;
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct StageSpecialization {
|
||||
static constexpr size_t MaxStageResources = 32;
|
||||
|
||||
const Shader::Info* info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||
u32 start_binding{};
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
const auto sharp = desc.GetSharp(*info);
|
||||
if (!sharp) {
|
||||
binding++;
|
||||
continue;
|
||||
}
|
||||
bitset.set(binding++);
|
||||
func(spec, desc, sharp);
|
||||
}
|
||||
}
|
||||
|
||||
StageSpecialization(const Shader::Info& info_, u32 start_binding_)
|
||||
: info{&info_}, start_binding{start_binding_} {
|
||||
u32 binding{};
|
||||
ForEachSharp(binding, buffers, info->buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
});
|
||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
|
||||
bool operator==(const StageSpecialization& other) const {
|
||||
if (start_binding != other.start_binding) {
|
||||
return false;
|
||||
}
|
||||
u32 binding{};
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < tex_buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < images.size(); i++) {
|
||||
if (other.bitset[binding++] && images[i] != other.images[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct Program {
|
||||
struct Module {
|
||||
vk::ShaderModule module;
|
||||
StageSpecialization spec;
|
||||
};
|
||||
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
};
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, u64 pgm_base,
|
||||
u64 hash, const AmdGpu::Liverpool::Regs& regs);
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
class ShaderCache {
|
||||
public:
|
||||
explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool);
|
||||
~ShaderCache() = default;
|
||||
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
|
||||
u32& binding);
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const auto* pgm,
|
||||
Shader::Stage stage,
|
||||
u32& binding) {
|
||||
// Fetch program for binaryinfo hash.
|
||||
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
|
||||
const u64 hash = bininfo->shader_hash;
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(hash);
|
||||
u64 stage_key{};
|
||||
if (new_program) {
|
||||
const VAddr pgm_base = pgm->template Address<VAddr>();
|
||||
auto program = program_pool.Create();
|
||||
program->info = MakeShaderInfo(stage, pgm->user_data, pgm_base, hash, liverpool->regs);
|
||||
u32 start_binding = binding;
|
||||
const auto module = CompileModule(program->info, pgm->Code(), 0, binding);
|
||||
program->modules.emplace_back(module,
|
||||
StageSpecialization{program->info, start_binding});
|
||||
it_pgm.value() = program;
|
||||
return std::make_tuple(&program->info, module, HashCombine(hash, 0));
|
||||
}
|
||||
|
||||
Program* program = it_pgm->second;
|
||||
const auto& info = program->info;
|
||||
size_t perm_idx = program->modules.size();
|
||||
StageSpecialization spec{info, binding};
|
||||
vk::ShaderModule module{};
|
||||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = MakeShaderInfo(stage, pgm->user_data, info.pgm_base, info.pgm_hash,
|
||||
liverpool->regs);
|
||||
module = CompileModule(new_info, pgm->Code(), perm_idx, binding);
|
||||
program->modules.emplace_back(module, std::move(spec));
|
||||
} else {
|
||||
binding += info.NumBindings();
|
||||
module = it->module;
|
||||
perm_idx = std::distance(program->modules.begin(), it);
|
||||
}
|
||||
return std::make_tuple(&info, module, HashCombine(hash, perm_idx));
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Shader::Profile profile{};
|
||||
tsl::robin_map<size_t, Program*> program_cache;
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Common::ObjectPool<Program> program_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
Loading…
Reference in New Issue