shader_recompiler: Implement shared memory load/store

This commit is contained in:
IndecisiveTurtle 2024-07-02 02:04:45 +03:00
parent 668f7673d9
commit e8a2cb7474
19 changed files with 410 additions and 70 deletions

25
src/common/div_ceil.h Executable file
View File

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common {
/// Ceiled integer division.
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
}
/// Ceiled integer division with logarithmic divisor in base 2
template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
}
} // namespace Common

View File

@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
const auto semantics{
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
spv::MemorySemanticsMask::ImageMemory};
ctx.OpMemoryBarrier(ctx.ConstU32(static_cast<u32>(scope)), ctx.ConstU32(static_cast<u32>(semantics)));
}
} // Anonymous namespace
void EmitBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Workgroup};
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
spv::MemorySemanticsMask::WorkgroupMemory};
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));
}
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Workgroup);
}
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Device);
}
} // namespace Shader::Backend::SPIRV

View File

@ -68,11 +68,7 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) {
} }
Id EmitConvertS32F32(EmitContext& ctx, Id value) { Id EmitConvertS32F32(EmitContext& ctx, Id value) {
if (ctx.profile.has_broken_signed_operations) {
return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
} else {
return ctx.OpConvertFToS(ctx.U32[1], value); return ctx.OpConvertFToS(ctx.U32[1], value);
}
} }
Id EmitConvertS32F64(EmitContext& ctx, Id value) { Id EmitConvertS32F64(EmitContext& ctx, Id value) {

View File

@ -43,6 +43,9 @@ void EmitSetVccHi(EmitContext& ctx);
void EmitPrologue(EmitContext& ctx); void EmitPrologue(EmitContext& ctx);
void EmitEpilogue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx);
void EmitDiscard(EmitContext& ctx); void EmitDiscard(EmitContext& ctx);
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx);
@ -82,12 +85,13 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx);
Id EmitReadSharedU8(EmitContext& ctx, Id offset); Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
Id EmitReadSharedS8(EmitContext& ctx, Id offset); Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
Id EmitReadSharedU16(EmitContext& ctx, Id offset); Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitReadSharedS16(EmitContext& ctx, Id offset); Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
Id EmitReadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitReadSharedU64(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);

View File

@ -0,0 +1,165 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
const Id shift_id{ctx.ConstU32(shift)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
}
Id Word(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
return ctx.OpLoad(ctx.U32[1], pointer);
}
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
const Id count_id{ctx.ConstU32(count)};
return {bit, count_id};
}
} // Anonymous namespace
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
} else {
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
}
}
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
return Word(ctx, offset);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
return ctx.OpLoad(ctx.U32[2], pointer);
} else {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
}
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
return ctx.OpLoad(ctx.U32[4], pointer);
}
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
std::array<Id, 4> values{};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
}
return ctx.OpCompositeConstruct(ctx.U32[4], values);
}
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
const Id pointer{
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
}
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
Id pointer{};
if (ctx.profile.support_explicit_workgroup_layout) {
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
} else {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
}
ctx.OpStore(pointer, value);
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
}
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
if (ctx.profile.support_explicit_workgroup_layout) {
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
ctx.OpStore(pointer, value);
return;
}
const Id shift{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
for (u32 i = 0; i < 4; ++i) {
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -3,6 +3,7 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
@ -41,8 +42,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::Shader);
DefineArithmeticTypes(); DefineArithmeticTypes();
DefineInterfaces(program); DefineInterfaces(program);
DefineBuffers(program.info); DefineBuffers(info);
DefineImagesAndSamplers(program.info); DefineImagesAndSamplers(info);
DefineSharedMemory(info);
} }
EmitContext::~EmitContext() = default; EmitContext::~EmitContext() = default;
@ -358,4 +360,50 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
} }
} }
void EmitContext::DefineSharedMemory(const Info& info) {
if (info.shared_memory_size == 0) {
return;
}
const auto make{[&](Id element_type, u32 element_size) {
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
const Id struct_type{TypeStruct(array_type)};
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
Decorate(struct_type, spv::Decoration::Block);
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
Decorate(variable, spv::Decoration::Aliased);
interfaces.push_back(variable);
return std::make_tuple(variable, element_pointer, pointer);
}};
if (profile.support_explicit_workgroup_layout) {
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
if (info.uses_shared_u8) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
}
if (info.uses_shared_u16) {
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
}
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
return;
}
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
interfaces.push_back(shared_memory_u32);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -147,6 +147,12 @@ public:
Id u32_zero_value{}; Id u32_zero_value{};
Id f32_zero_value{}; Id f32_zero_value{};
Id shared_u8{};
Id shared_u16{};
Id shared_u32{};
Id shared_u32x2{};
Id shared_u32x4{};
Id input_u32{}; Id input_u32{};
Id input_f32{}; Id input_f32{};
Id input_s32{}; Id input_s32{};
@ -169,6 +175,14 @@ public:
Id subgroup_local_invocation_id{}; Id subgroup_local_invocation_id{};
Id image_u32{}; Id image_u32{};
Id shared_memory_u8{};
Id shared_memory_u16{};
Id shared_memory_u32{};
Id shared_memory_u32x2{};
Id shared_memory_u32x4{};
Id shared_memory_u32_type{};
struct TextureDefinition { struct TextureDefinition {
Id id; Id id;
Id sampled_type; Id sampled_type;
@ -206,6 +220,7 @@ private:
void DefineOutputs(const Info& info); void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info); void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info); void DefineImagesAndSamplers(const Info& info);
void DefineSharedMemory(const Info& info);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
}; };

View File

@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) { if (is_pair) {
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0)); ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1)); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr)); const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
} else { } else {
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr); const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
ir.SetVectorReg(dst_reg, data); ir.SetVectorReg(dst_reg, data);
} }
} }
@ -41,17 +43,22 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
const IR::VectorReg data0{inst.src[1].code}; const IR::VectorReg data0{inst.src[1].code};
const IR::VectorReg data1{inst.src[2].code}; const IR::VectorReg data1{inst.src[2].code};
if (is_pair) { if (is_pair) {
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.WriteShared(32, ir.GetVectorReg(data0), addr0); ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.WriteShared(32, ir.GetVectorReg(data1), addr1); ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::U64 data = ir.PackUint2x32( const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0),
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1))); ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, data, addr); ir.WriteShared(bit_size, data, addr);
} else { } else {
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
} }
} }
void Translator::S_BARRIER() {
ir.Barrier();
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -466,6 +466,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CMP_LT_U32: case Opcode::S_CMP_LT_U32:
translator.S_CMP(ConditionOp::LT, false, inst); translator.S_CMP(ConditionOp::LT, false, inst);
break; break;
case Opcode::S_CMP_LE_U32:
translator.S_CMP(ConditionOp::LE, false, inst);
break;
case Opcode::S_CMP_LG_U32: case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst); translator.S_CMP(ConditionOp::LG, false, inst);
break; break;
@ -816,9 +819,24 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_GET_RESINFO: case Opcode::IMAGE_GET_RESINFO:
translator.IMAGE_GET_RESINFO(inst); translator.IMAGE_GET_RESINFO(inst);
break; break;
case Opcode::S_BARRIER:
translator.S_BARRIER();
break;
case Opcode::S_TTRACEDATA: case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break; break;
case Opcode::DS_READ_B32:
translator.DS_READ(32, false, false, inst);
break;
case Opcode::DS_READ2_B32:
translator.DS_READ(32, false, true, inst);
break;
case Opcode::DS_WRITE_B32:
translator.DS_WRITE(32, false, false, inst);
break;
case Opcode::DS_WRITE2_B32:
translator.DS_WRITE(32, false, true, inst);
break;
case Opcode::S_NOP: case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:

View File

@ -165,6 +165,7 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst); void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void S_BARRIER();
// MIMG // MIMG
void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst);

View File

@ -115,6 +115,18 @@ void IREmitter::Discard() {
Inst(Opcode::Discard); Inst(Opcode::Discard);
} }
void IREmitter::Barrier() {
Inst(Opcode::Barrier);
}
void IREmitter::WorkgroupMemoryBarrier() {
Inst(Opcode::WorkgroupMemoryBarrier);
}
void IREmitter::DeviceMemoryBarrier() {
Inst(Opcode::DeviceMemoryBarrier);
}
U32 IREmitter::GetUserData(IR::ScalarReg reg) { U32 IREmitter::GetUserData(IR::ScalarReg reg) {
return Inst<U32>(Opcode::GetUserData, reg); return Inst<U32>(Opcode::GetUserData, reg);
} }
@ -240,22 +252,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
} }
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) { Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
/*switch (bit_size) { switch (bit_size) {
case 8: case 8:
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset); return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
case 16: case 16:
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset); return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
case 32: case 32:
return Inst<U32>(Opcode::ReadSharedU32, offset); return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64: case 64:
return Inst<U64>(Opcode::ReadSharedU64, offset); return Inst<U64>(Opcode::LoadSharedU64, offset);
case 128:
return Inst(Opcode::LoadSharedU128, offset);
} }
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/ UNREACHABLE_MSG("Invalid bit size {}", bit_size);
} }
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
/*switch (bit_size) { switch (bit_size) {
case 8: case 8:
Inst(Opcode::WriteSharedU8, offset, value); Inst(Opcode::WriteSharedU8, offset, value);
break; break;
@ -268,9 +282,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
case 64: case 64:
Inst(Opcode::WriteSharedU64, offset, value); Inst(Opcode::WriteSharedU64, offset, value);
break; break;
case 128:
Inst(Opcode::WriteSharedU128, offset, value);
break;
default: default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size); throw InvalidArgument("Invalid bit size {}", bit_size);
}*/ }
} }
U32 IREmitter::ReadConst(const Value& base, const U32& offset) { U32 IREmitter::ReadConst(const Value& base, const U32& offset) {

View File

@ -43,6 +43,10 @@ public:
void Epilogue(); void Epilogue();
void Discard(); void Discard();
void Barrier();
void WorkgroupMemoryBarrier();
void DeviceMemoryBarrier();
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value); void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
@ -74,7 +78,7 @@ public:
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);

View File

@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
OPCODE(ReadConstBuffer, F32, Opaque, U32, ) OPCODE(ReadConstBuffer, F32, Opaque, U32, )
OPCODE(ReadConstBufferU32, U32, Opaque, U32, ) OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
// Barriers
OPCODE(Barrier, Void, )
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
// Shared memory operations
OPCODE(LoadSharedU8, U32, U32, )
OPCODE(LoadSharedS8, U32, U32, )
OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedS16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(LoadSharedU128, U32x4, U32, )
OPCODE(WriteSharedU8, Void, U32, U32, )
OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, )
// Context getters/setters // Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetUserData, U32, ScalarReg, )
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )

View File

@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break; break;
} }
case IR::Opcode::LoadSharedS8:
case IR::Opcode::LoadSharedU8:
case IR::Opcode::WriteSharedU8:
info.uses_shared_u8 = true;
break;
case IR::Opcode::LoadSharedS16:
case IR::Opcode::LoadSharedU16:
case IR::Opcode::WriteSharedU16:
info.uses_shared_u16 = true;
break;
case IR::Opcode::QuadShuffle: case IR::Opcode::QuadShuffle:
info.uses_group_quad = true; info.uses_group_quad = true;
break; break;

View File

@ -26,44 +26,9 @@ struct Profile {
bool support_fp32_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{};
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool support_vote{};
bool support_viewport_mask{};
bool support_typeless_image_loads{};
bool support_derivative_control{};
bool support_geometry_shader_passthrough{};
bool support_native_ndc{};
bool support_scaled_attributes{};
bool support_multi_viewport{};
bool support_geometry_streams{};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
/// See Ori and the Blind Forest's main menu for reference.
bool need_declared_frag_colors{};
/// Prevents fast math optimizations that may cause inaccuracies
bool need_fastmath_off{};
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
/// The Position builtin needs to be wrapped in a struct when used as an input bool lower_left_origin_mode{};
bool has_broken_spirv_position_input{};
/// Offset image operands with an unsigned type do not work
bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted
bool has_broken_signed_operations{};
/// Float controls break when fp16 is enabled
bool has_broken_fp16_float_controls{};
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
bool ignore_nan_fp_comparisons{};
/// Maxwell and earlier nVidia architectures have broken robust support
bool has_broken_robust{};
u64 min_ssbo_alignment{}; u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
}; };
} // namespace Shader } // namespace Shader

View File

@ -134,7 +134,10 @@ struct Info {
std::span<const u32> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
u32 shared_memory_size{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_shared_u8{};
bool uses_shared_u16{};
bool translation_failed{}; // indicates that shader has unsupported instructions bool translation_failed{}; // indicates that shader has unsupported instructions
template <typename T> template <typename T>

View File

@ -121,6 +121,7 @@ struct Liverpool {
BitField<0, 6, u64> num_vgprs; BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs; BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs; BitField<33, 5, u64> num_user_regs;
BitField<47, 9, u64> lds_dwords;
} settings; } settings;
INSERT_PADDING_WORDS(1); INSERT_PADDING_WORDS(1);
u32 resource_limits; u32 resource_limits;
@ -133,6 +134,11 @@ struct Liverpool {
return reinterpret_cast<const T*>(addr); return reinterpret_cast<const T*>(addr);
} }
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords.Value() * 128 * 4;
}
std::span<const u32> Code() const { std::span<const u32> Code() const {
const u32* code = Address<u32>(); const u32* code = Address<u32>();
BinaryInfo bininfo; BinaryInfo bininfo;

View File

@ -45,6 +45,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
info.num_user_data = cs_pgm.settings.num_user_regs; info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full}; cs_pgm.num_thread_z.full};
info.shared_memory_size = cs_pgm.SharedMemSize();
break; break;
} }
default: default:
@ -60,6 +61,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
profile = Shader::Profile{ profile = Shader::Profile{
.supported_spirv = 0x00010600U, .supported_spirv = 0x00010600U,
.support_explicit_workgroup_layout = true,
}; };
} }

View File

@ -179,19 +179,18 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR8Unorm: case vk::Format::eR8Unorm:
return vk::Format::eR8Uint; return vk::Format::eR8Uint;
case vk::Format::eR8G8Unorm: case vk::Format::eR8G8Unorm:
case vk::Format::eR16Sfloat:
return vk::Format::eR8G8Uint; return vk::Format::eR8G8Uint;
case vk::Format::eR8G8B8A8Srgb: case vk::Format::eR8G8B8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Srgb:
[[fallthrough]];
case vk::Format::eB8G8R8A8Unorm: case vk::Format::eB8G8R8A8Unorm:
[[fallthrough]];
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR32Uint:
return vk::Format::eR32Uint; return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint; return vk::Format::eR32G32Uint;
case vk::Format::eBc3SrgbBlock: case vk::Format::eBc3SrgbBlock:
[[fallthrough]];
case vk::Format::eBc3UnormBlock: case vk::Format::eBc3UnormBlock:
case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock: case vk::Format::eBc7UnormBlock: