shader_recompiler: Implement shared memory load/store
This commit is contained in:
parent
668f7673d9
commit
e8a2cb7474
|
@ -0,0 +1,25 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
/// Ceiled integer division.
|
||||||
|
template <typename N, typename D>
|
||||||
|
requires std::is_integral_v<N> && std::is_unsigned_v<D>
|
||||||
|
[[nodiscard]] constexpr N DivCeil(N number, D divisor) {
|
||||||
|
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ceiled integer division with logarithmic divisor in base 2
|
||||||
|
template <typename N, typename D>
|
||||||
|
requires std::is_integral_v<N> && std::is_unsigned_v<D>
|
||||||
|
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
|
||||||
|
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,36 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
|
namespace Shader::Backend::SPIRV {
|
||||||
|
namespace {
|
||||||
|
void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
|
||||||
|
const auto semantics{
|
||||||
|
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
|
||||||
|
spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
|
||||||
|
spv::MemorySemanticsMask::ImageMemory};
|
||||||
|
ctx.OpMemoryBarrier(ctx.ConstU32(static_cast<u32>(scope)), ctx.ConstU32(static_cast<u32>(semantics)));
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
void EmitBarrier(EmitContext& ctx) {
|
||||||
|
const auto execution{spv::Scope::Workgroup};
|
||||||
|
const auto memory{spv::Scope::Workgroup};
|
||||||
|
const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
|
||||||
|
spv::MemorySemanticsMask::WorkgroupMemory};
|
||||||
|
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
|
||||||
|
ctx.ConstU32(static_cast<u32>(memory)),
|
||||||
|
ctx.ConstU32(static_cast<u32>(memory_semantics)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
|
||||||
|
MemoryBarrier(ctx, spv::Scope::Workgroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitDeviceMemoryBarrier(EmitContext& ctx) {
|
||||||
|
MemoryBarrier(ctx, spv::Scope::Device);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Backend::SPIRV
|
|
@ -68,11 +68,7 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitConvertS32F32(EmitContext& ctx, Id value) {
|
Id EmitConvertS32F32(EmitContext& ctx, Id value) {
|
||||||
if (ctx.profile.has_broken_signed_operations) {
|
|
||||||
return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
|
|
||||||
} else {
|
|
||||||
return ctx.OpConvertFToS(ctx.U32[1], value);
|
return ctx.OpConvertFToS(ctx.U32[1], value);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitConvertS32F64(EmitContext& ctx, Id value) {
|
Id EmitConvertS32F64(EmitContext& ctx, Id value) {
|
||||||
|
|
|
@ -43,6 +43,9 @@ void EmitSetVccHi(EmitContext& ctx);
|
||||||
void EmitPrologue(EmitContext& ctx);
|
void EmitPrologue(EmitContext& ctx);
|
||||||
void EmitEpilogue(EmitContext& ctx);
|
void EmitEpilogue(EmitContext& ctx);
|
||||||
void EmitDiscard(EmitContext& ctx);
|
void EmitDiscard(EmitContext& ctx);
|
||||||
|
void EmitBarrier(EmitContext& ctx);
|
||||||
|
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||||
|
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
|
||||||
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
void EmitGetThreadBitScalarReg(EmitContext& ctx);
|
||||||
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
void EmitSetThreadBitScalarReg(EmitContext& ctx);
|
||||||
|
@ -82,12 +85,13 @@ Id EmitUndefU8(EmitContext& ctx);
|
||||||
Id EmitUndefU16(EmitContext& ctx);
|
Id EmitUndefU16(EmitContext& ctx);
|
||||||
Id EmitUndefU32(EmitContext& ctx);
|
Id EmitUndefU32(EmitContext& ctx);
|
||||||
Id EmitUndefU64(EmitContext& ctx);
|
Id EmitUndefU64(EmitContext& ctx);
|
||||||
Id EmitReadSharedU8(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
|
||||||
Id EmitReadSharedS8(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
|
||||||
Id EmitReadSharedU16(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||||
Id EmitReadSharedS16(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
|
||||||
Id EmitReadSharedU32(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||||
Id EmitReadSharedU64(EmitContext& ctx, Id offset);
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||||
|
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
|
||||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
|
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
|
||||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
|
|
@ -0,0 +1,165 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||||
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
|
namespace Shader::Backend::SPIRV {
|
||||||
|
namespace {
|
||||||
|
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||||
|
const Id shift_id{ctx.ConstU32(shift)};
|
||||||
|
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
|
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id Word(EmitContext& ctx, Id offset) {
|
||||||
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
|
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
|
||||||
|
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
|
||||||
|
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
|
||||||
|
const Id count_id{ctx.ConstU32(count)};
|
||||||
|
return {bit, count_id};
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||||
|
} else {
|
||||||
|
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||||
|
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
|
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||||
|
} else {
|
||||||
|
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||||
|
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||||
|
} else {
|
||||||
|
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||||
|
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
|
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||||
|
} else {
|
||||||
|
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||||
|
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
|
||||||
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
} else {
|
||||||
|
return Word(ctx, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||||
|
return ctx.OpLoad(ctx.U32[2], pointer);
|
||||||
|
} else {
|
||||||
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
|
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||||
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||||
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||||
|
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||||
|
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||||
|
return ctx.OpLoad(ctx.U32[4], pointer);
|
||||||
|
}
|
||||||
|
const Id shift_id{ctx.ConstU32(2U)};
|
||||||
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
|
std::array<Id, 4> values{};
|
||||||
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||||
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
|
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
|
}
|
||||||
|
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
|
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
|
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
Id pointer{};
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
|
||||||
|
} else {
|
||||||
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
|
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||||
|
}
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
|
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||||
|
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||||
|
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||||
|
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
|
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const Id shift{ctx.ConstU32(2U)};
|
||||||
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(i))};
|
||||||
|
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||||
|
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::Backend::SPIRV
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
#include "common/div_ceil.h"
|
||||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
@ -41,8 +42,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineInterfaces(program);
|
DefineInterfaces(program);
|
||||||
DefineBuffers(program.info);
|
DefineBuffers(info);
|
||||||
DefineImagesAndSamplers(program.info);
|
DefineImagesAndSamplers(info);
|
||||||
|
DefineSharedMemory(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
|
@ -358,4 +360,50 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineSharedMemory(const Info& info) {
|
||||||
|
if (info.shared_memory_size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto make{[&](Id element_type, u32 element_size) {
|
||||||
|
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
|
||||||
|
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||||
|
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||||
|
|
||||||
|
const Id struct_type{TypeStruct(array_type)};
|
||||||
|
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
|
|
||||||
|
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
|
||||||
|
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
|
||||||
|
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
|
||||||
|
Decorate(variable, spv::Decoration::Aliased);
|
||||||
|
interfaces.push_back(variable);
|
||||||
|
|
||||||
|
return std::make_tuple(variable, element_pointer, pointer);
|
||||||
|
}};
|
||||||
|
if (profile.support_explicit_workgroup_layout) {
|
||||||
|
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||||
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||||
|
if (info.uses_shared_u8) {
|
||||||
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||||
|
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
||||||
|
}
|
||||||
|
if (info.uses_shared_u16) {
|
||||||
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||||
|
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
||||||
|
}
|
||||||
|
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
|
||||||
|
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
|
||||||
|
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
|
||||||
|
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||||
|
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||||
|
|
||||||
|
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||||
|
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||||
|
interfaces.push_back(shared_memory_u32);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -147,6 +147,12 @@ public:
|
||||||
Id u32_zero_value{};
|
Id u32_zero_value{};
|
||||||
Id f32_zero_value{};
|
Id f32_zero_value{};
|
||||||
|
|
||||||
|
Id shared_u8{};
|
||||||
|
Id shared_u16{};
|
||||||
|
Id shared_u32{};
|
||||||
|
Id shared_u32x2{};
|
||||||
|
Id shared_u32x4{};
|
||||||
|
|
||||||
Id input_u32{};
|
Id input_u32{};
|
||||||
Id input_f32{};
|
Id input_f32{};
|
||||||
Id input_s32{};
|
Id input_s32{};
|
||||||
|
@ -169,6 +175,14 @@ public:
|
||||||
Id subgroup_local_invocation_id{};
|
Id subgroup_local_invocation_id{};
|
||||||
Id image_u32{};
|
Id image_u32{};
|
||||||
|
|
||||||
|
Id shared_memory_u8{};
|
||||||
|
Id shared_memory_u16{};
|
||||||
|
Id shared_memory_u32{};
|
||||||
|
Id shared_memory_u32x2{};
|
||||||
|
Id shared_memory_u32x4{};
|
||||||
|
|
||||||
|
Id shared_memory_u32_type{};
|
||||||
|
|
||||||
struct TextureDefinition {
|
struct TextureDefinition {
|
||||||
Id id;
|
Id id;
|
||||||
Id sampled_type;
|
Id sampled_type;
|
||||||
|
@ -206,6 +220,7 @@ private:
|
||||||
void DefineOutputs(const Info& info);
|
void DefineOutputs(const Info& info);
|
||||||
void DefineBuffers(const Info& info);
|
void DefineBuffers(const Info& info);
|
||||||
void DefineImagesAndSamplers(const Info& info);
|
void DefineImagesAndSamplers(const Info& info);
|
||||||
|
void DefineSharedMemory(const Info& info);
|
||||||
|
|
||||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
|
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
|
||||||
|
ASSERT(bit_size == 32);
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0));
|
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||||
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1));
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr));
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
||||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr);
|
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
|
||||||
ir.SetVectorReg(dst_reg, data);
|
ir.SetVectorReg(dst_reg, data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,17 +43,22 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
||||||
const IR::VectorReg data0{inst.src[1].code};
|
const IR::VectorReg data0{inst.src[1].code};
|
||||||
const IR::VectorReg data1{inst.src[2].code};
|
const IR::VectorReg data1{inst.src[2].code};
|
||||||
if (is_pair) {
|
if (is_pair) {
|
||||||
|
ASSERT(bit_size == 32);
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||||
} else if (bit_size == 64) {
|
} else if (bit_size == 64) {
|
||||||
const IR::U64 data = ir.PackUint2x32(
|
const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)));
|
ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, data, addr);
|
ir.WriteShared(bit_size, data, addr);
|
||||||
} else {
|
} else {
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_BARRIER() {
|
||||||
|
ir.Barrier();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -466,6 +466,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_CMP_LT_U32:
|
case Opcode::S_CMP_LT_U32:
|
||||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_CMP_LE_U32:
|
||||||
|
translator.S_CMP(ConditionOp::LE, false, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_CMP_LG_U32:
|
case Opcode::S_CMP_LG_U32:
|
||||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -816,9 +819,24 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::IMAGE_GET_RESINFO:
|
case Opcode::IMAGE_GET_RESINFO:
|
||||||
translator.IMAGE_GET_RESINFO(inst);
|
translator.IMAGE_GET_RESINFO(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_BARRIER:
|
||||||
|
translator.S_BARRIER();
|
||||||
|
break;
|
||||||
case Opcode::S_TTRACEDATA:
|
case Opcode::S_TTRACEDATA:
|
||||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||||
break;
|
break;
|
||||||
|
case Opcode::DS_READ_B32:
|
||||||
|
translator.DS_READ(32, false, false, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::DS_READ2_B32:
|
||||||
|
translator.DS_READ(32, false, true, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::DS_WRITE_B32:
|
||||||
|
translator.DS_WRITE(32, false, false, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::DS_WRITE2_B32:
|
||||||
|
translator.DS_WRITE(32, false, true, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_NOP:
|
case Opcode::S_NOP:
|
||||||
case Opcode::S_CBRANCH_EXECZ:
|
case Opcode::S_CBRANCH_EXECZ:
|
||||||
case Opcode::S_CBRANCH_SCC0:
|
case Opcode::S_CBRANCH_SCC0:
|
||||||
|
|
|
@ -165,6 +165,7 @@ public:
|
||||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||||
|
void S_BARRIER();
|
||||||
|
|
||||||
// MIMG
|
// MIMG
|
||||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||||
|
|
|
@ -115,6 +115,18 @@ void IREmitter::Discard() {
|
||||||
Inst(Opcode::Discard);
|
Inst(Opcode::Discard);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IREmitter::Barrier() {
|
||||||
|
Inst(Opcode::Barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IREmitter::WorkgroupMemoryBarrier() {
|
||||||
|
Inst(Opcode::WorkgroupMemoryBarrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IREmitter::DeviceMemoryBarrier() {
|
||||||
|
Inst(Opcode::DeviceMemoryBarrier);
|
||||||
|
}
|
||||||
|
|
||||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||||
return Inst<U32>(Opcode::GetUserData, reg);
|
return Inst<U32>(Opcode::GetUserData, reg);
|
||||||
}
|
}
|
||||||
|
@ -240,22 +252,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
||||||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) {
|
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||||
/*switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 8:
|
case 8:
|
||||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset);
|
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U32>(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset);
|
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U32>(Opcode::ReadSharedU32, offset);
|
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U64>(Opcode::ReadSharedU64, offset);
|
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||||
|
case 128:
|
||||||
|
return Inst(Opcode::LoadSharedU128, offset);
|
||||||
}
|
}
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);*/
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||||
/*switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 8:
|
case 8:
|
||||||
Inst(Opcode::WriteSharedU8, offset, value);
|
Inst(Opcode::WriteSharedU8, offset, value);
|
||||||
break;
|
break;
|
||||||
|
@ -268,9 +282,12 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
||||||
case 64:
|
case 64:
|
||||||
Inst(Opcode::WriteSharedU64, offset, value);
|
Inst(Opcode::WriteSharedU64, offset, value);
|
||||||
break;
|
break;
|
||||||
|
case 128:
|
||||||
|
Inst(Opcode::WriteSharedU128, offset, value);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
throw InvalidArgument("Invalid bit size {}", bit_size);
|
||||||
}*/
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||||
|
|
|
@ -43,6 +43,10 @@ public:
|
||||||
void Epilogue();
|
void Epilogue();
|
||||||
void Discard();
|
void Discard();
|
||||||
|
|
||||||
|
void Barrier();
|
||||||
|
void WorkgroupMemoryBarrier();
|
||||||
|
void DeviceMemoryBarrier();
|
||||||
|
|
||||||
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
|
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
|
||||||
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
|
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
|
||||||
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
|
void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);
|
||||||
|
@ -74,7 +78,7 @@ public:
|
||||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||||
|
|
||||||
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||||
|
|
||||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||||
|
|
|
@ -19,6 +19,25 @@ OPCODE(ReadConst, U32, U32x
|
||||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||||
|
|
||||||
|
// Barriers
|
||||||
|
OPCODE(Barrier, Void, )
|
||||||
|
OPCODE(WorkgroupMemoryBarrier, Void, )
|
||||||
|
OPCODE(DeviceMemoryBarrier, Void, )
|
||||||
|
|
||||||
|
// Shared memory operations
|
||||||
|
OPCODE(LoadSharedU8, U32, U32, )
|
||||||
|
OPCODE(LoadSharedS8, U32, U32, )
|
||||||
|
OPCODE(LoadSharedU16, U32, U32, )
|
||||||
|
OPCODE(LoadSharedS16, U32, U32, )
|
||||||
|
OPCODE(LoadSharedU32, U32, U32, )
|
||||||
|
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||||
|
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||||
|
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||||
|
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||||
|
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||||
|
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||||
|
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||||
|
|
||||||
// Context getters/setters
|
// Context getters/setters
|
||||||
OPCODE(GetUserData, U32, ScalarReg, )
|
OPCODE(GetUserData, U32, ScalarReg, )
|
||||||
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
|
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
|
||||||
|
|
|
@ -16,6 +16,16 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||||
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case IR::Opcode::LoadSharedS8:
|
||||||
|
case IR::Opcode::LoadSharedU8:
|
||||||
|
case IR::Opcode::WriteSharedU8:
|
||||||
|
info.uses_shared_u8 = true;
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedS16:
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
info.uses_shared_u16 = true;
|
||||||
|
break;
|
||||||
case IR::Opcode::QuadShuffle:
|
case IR::Opcode::QuadShuffle:
|
||||||
info.uses_group_quad = true;
|
info.uses_group_quad = true;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -26,44 +26,9 @@ struct Profile {
|
||||||
bool support_fp32_signed_zero_nan_preserve{};
|
bool support_fp32_signed_zero_nan_preserve{};
|
||||||
bool support_fp64_signed_zero_nan_preserve{};
|
bool support_fp64_signed_zero_nan_preserve{};
|
||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
bool support_vote{};
|
|
||||||
bool support_viewport_mask{};
|
|
||||||
bool support_typeless_image_loads{};
|
|
||||||
bool support_derivative_control{};
|
|
||||||
bool support_geometry_shader_passthrough{};
|
|
||||||
bool support_native_ndc{};
|
|
||||||
bool support_scaled_attributes{};
|
|
||||||
bool support_multi_viewport{};
|
|
||||||
bool support_geometry_streams{};
|
|
||||||
|
|
||||||
bool warp_size_potentially_larger_than_guest{};
|
|
||||||
|
|
||||||
bool lower_left_origin_mode{};
|
|
||||||
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
|
|
||||||
/// See Ori and the Blind Forest's main menu for reference.
|
|
||||||
bool need_declared_frag_colors{};
|
|
||||||
/// Prevents fast math optimizations that may cause inaccuracies
|
|
||||||
bool need_fastmath_off{};
|
|
||||||
|
|
||||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
/// The Position builtin needs to be wrapped in a struct when used as an input
|
bool lower_left_origin_mode{};
|
||||||
bool has_broken_spirv_position_input{};
|
|
||||||
/// Offset image operands with an unsigned type do not work
|
|
||||||
bool has_broken_unsigned_image_offsets{};
|
|
||||||
/// Signed instructions with unsigned data types are misinterpreted
|
|
||||||
bool has_broken_signed_operations{};
|
|
||||||
/// Float controls break when fp16 is enabled
|
|
||||||
bool has_broken_fp16_float_controls{};
|
|
||||||
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
|
|
||||||
bool ignore_nan_fp_comparisons{};
|
|
||||||
|
|
||||||
/// Maxwell and earlier nVidia architectures have broken robust support
|
|
||||||
bool has_broken_robust{};
|
|
||||||
|
|
||||||
u64 min_ssbo_alignment{};
|
u64 min_ssbo_alignment{};
|
||||||
|
|
||||||
u32 max_user_clip_distances{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
@ -134,7 +134,10 @@ struct Info {
|
||||||
std::span<const u32> user_data;
|
std::span<const u32> user_data;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
|
|
||||||
|
u32 shared_memory_size{};
|
||||||
bool uses_group_quad{};
|
bool uses_group_quad{};
|
||||||
|
bool uses_shared_u8{};
|
||||||
|
bool uses_shared_u16{};
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -121,6 +121,7 @@ struct Liverpool {
|
||||||
BitField<0, 6, u64> num_vgprs;
|
BitField<0, 6, u64> num_vgprs;
|
||||||
BitField<6, 4, u64> num_sgprs;
|
BitField<6, 4, u64> num_sgprs;
|
||||||
BitField<33, 5, u64> num_user_regs;
|
BitField<33, 5, u64> num_user_regs;
|
||||||
|
BitField<47, 9, u64> lds_dwords;
|
||||||
} settings;
|
} settings;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
u32 resource_limits;
|
u32 resource_limits;
|
||||||
|
@ -133,6 +134,11 @@ struct Liverpool {
|
||||||
return reinterpret_cast<const T*>(addr);
|
return reinterpret_cast<const T*>(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 SharedMemSize() const noexcept {
|
||||||
|
// lds_dwords is in units of 128 dwords. We return bytes.
|
||||||
|
return settings.lds_dwords.Value() * 128 * 4;
|
||||||
|
}
|
||||||
|
|
||||||
std::span<const u32> Code() const {
|
std::span<const u32> Code() const {
|
||||||
const u32* code = Address<u32>();
|
const u32* code = Address<u32>();
|
||||||
BinaryInfo bininfo;
|
BinaryInfo bininfo;
|
||||||
|
|
|
@ -45,6 +45,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
||||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||||
cs_pgm.num_thread_z.full};
|
cs_pgm.num_thread_z.full};
|
||||||
|
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -60,6 +61,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||||
profile = Shader::Profile{
|
profile = Shader::Profile{
|
||||||
.supported_spirv = 0x00010600U,
|
.supported_spirv = 0x00010600U,
|
||||||
|
.support_explicit_workgroup_layout = true,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,19 +179,18 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
case vk::Format::eR8Unorm:
|
case vk::Format::eR8Unorm:
|
||||||
return vk::Format::eR8Uint;
|
return vk::Format::eR8Uint;
|
||||||
case vk::Format::eR8G8Unorm:
|
case vk::Format::eR8G8Unorm:
|
||||||
|
case vk::Format::eR16Sfloat:
|
||||||
return vk::Format::eR8G8Uint;
|
return vk::Format::eR8G8Uint;
|
||||||
case vk::Format::eR8G8B8A8Srgb:
|
case vk::Format::eR8G8B8A8Srgb:
|
||||||
[[fallthrough]];
|
|
||||||
case vk::Format::eB8G8R8A8Srgb:
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
[[fallthrough]];
|
|
||||||
case vk::Format::eB8G8R8A8Unorm:
|
case vk::Format::eB8G8R8A8Unorm:
|
||||||
[[fallthrough]];
|
|
||||||
case vk::Format::eR8G8B8A8Unorm:
|
case vk::Format::eR8G8B8A8Unorm:
|
||||||
|
case vk::Format::eR32Uint:
|
||||||
return vk::Format::eR32Uint;
|
return vk::Format::eR32Uint;
|
||||||
case vk::Format::eBc1RgbaUnormBlock:
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
case vk::Format::eR32G32Sfloat:
|
||||||
return vk::Format::eR32G32Uint;
|
return vk::Format::eR32G32Uint;
|
||||||
case vk::Format::eBc3SrgbBlock:
|
case vk::Format::eBc3SrgbBlock:
|
||||||
[[fallthrough]];
|
|
||||||
case vk::Format::eBc3UnormBlock:
|
case vk::Format::eBc3UnormBlock:
|
||||||
case vk::Format::eBc7SrgbBlock:
|
case vk::Format::eBc7SrgbBlock:
|
||||||
case vk::Format::eBc7UnormBlock:
|
case vk::Format::eBc7UnormBlock:
|
||||||
|
|
Loading…
Reference in New Issue