video_core: Use texture buffers for untyped format load/store

This commit is contained in:
IndecisiveTurtle 2024-08-27 14:57:21 +03:00
parent 833a366e3b
commit f118dc7eca
20 changed files with 400 additions and 395 deletions

View File

@ -120,6 +120,7 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle,
} }
SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) { SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) {
return nullptr;
LOG_TRACE(Lib_AvPlayer, "called"); LOG_TRACE(Lib_AvPlayer, "called");
if (data == nullptr) { if (data == nullptr) {
return nullptr; return nullptr;
@ -325,4 +326,4 @@ void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("yN7Jhuv8g24", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerVprintf); LIB_FUNCTION("yN7Jhuv8g24", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerVprintf);
}; };
} // namespace Libraries::AvPlayer } // namespace Libraries::AvPlayer

View File

@ -1066,7 +1066,16 @@ ScePthread PThreadPool::Create() {
} }
} }
#ifdef _WIN64
auto* ret = new PthreadInternal{}; auto* ret = new PthreadInternal{};
#else
// TODO: Linux specific hack
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false; ret->is_free = false;
ret->is_detached = false; ret->is_detached = false;
ret->is_almost_done = false; ret->is_almost_done = false;

View File

@ -189,6 +189,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
} }
if (info.has_texel_buffers) {
ctx.AddCapability(spv::Capability::SampledBuffer);
}
switch (program.info.stage) { switch (program.info.stage) {
case Stage::Compute: { case Stage::Compute: {
const std::array<u32, 3> workgroup_size{program.info.workgroup_size}; const std::array<u32, 3> workgroup_size{program.info.workgroup_size};

View File

@ -262,171 +262,15 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<4>(ctx, handle, address); return EmitLoadBufferF32xN<4>(ctx, handle, address);
} }
static bool IsSignedInteger(AmdGpu::NumberFormat format) {
switch (format) {
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Uint:
return false;
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Sint:
case AmdGpu::NumberFormat::SnormNz:
return true;
case AmdGpu::NumberFormat::Float:
default:
UNREACHABLE();
}
}
static u32 UXBitsMax(u32 bit_width) {
return (1u << bit_width) - 1u;
}
static u32 SXBitsMax(u32 bit_width) {
return (1u << (bit_width - 1u)) - 1u;
}
static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) {
switch (format) {
case AmdGpu::NumberFormat::Unorm:
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))));
case AmdGpu::NumberFormat::Snorm:
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width))));
case AmdGpu::NumberFormat::SnormNz:
// (x * 2 + 1) / (Format::SMAX * 2)
value = ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(2.f));
value = ctx.OpFAdd(ctx.F32[1], value, ctx.ConstF32(1.f));
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width) * 2)));
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uint:
case AmdGpu::NumberFormat::Sint:
case AmdGpu::NumberFormat::Float:
return value;
default:
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
magic_enum::enum_name(format));
}
}
static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offset) {
Id comp_offset = ctx.ConstU32(bit_offset);
if (stride < 4) {
// comp_offset += (address % 4) * 8;
const Id byte_offset = ctx.OpUMod(ctx.U32[1], address, ctx.ConstU32(4u));
const Id bit_offset = ctx.OpShiftLeftLogical(ctx.U32[1], byte_offset, ctx.ConstU32(3u));
comp_offset = ctx.OpIAdd(ctx.U32[1], comp_offset, bit_offset);
}
return comp_offset;
}
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
auto& buffer = ctx.buffers[handle];
const auto format = buffer.dfmt;
switch (format) {
case AmdGpu::DataFormat::FormatInvalid:
return ctx.f32_zero_value;
case AmdGpu::DataFormat::Format8:
case AmdGpu::DataFormat::Format16:
case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format8_8:
case AmdGpu::DataFormat::Format16_16:
case AmdGpu::DataFormat::Format10_11_11:
case AmdGpu::DataFormat::Format11_11_10:
case AmdGpu::DataFormat::Format10_10_10_2:
case AmdGpu::DataFormat::Format2_10_10_10:
case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format32_32:
case AmdGpu::DataFormat::Format16_16_16_16:
case AmdGpu::DataFormat::Format32_32_32:
case AmdGpu::DataFormat::Format32_32_32_32: {
const u32 num_components = AmdGpu::NumComponents(format);
if (comp >= num_components) {
return ctx.f32_zero_value;
}
// uint index = address / 4;
Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const u32 stride = buffer.stride;
if (stride > 4) {
const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
if (index_offset > 0) {
// index += index_offset;
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(index_offset));
}
}
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
const u32 bit_width = AmdGpu::ComponentBits(format, comp);
const auto num_format = buffer.nfmt;
if (num_format == AmdGpu::NumberFormat::Float) {
if (bit_width == 32) {
return ctx.OpLoad(ctx.F32[1], ptr);
} else if (bit_width == 16) {
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
Id value = ctx.OpLoad(ctx.U32[1], ptr);
value =
ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width));
value = ctx.OpSConvert(ctx.U16, value);
value = ctx.OpBitcast(ctx.F16[1], value);
return ctx.OpFConvert(ctx.F32[1], value);
} else {
UNREACHABLE_MSG("Invalid float bit width {}", bit_width);
}
} else {
Id value = ctx.OpLoad(ctx.U32[1], ptr);
const bool is_signed = IsSignedInteger(num_format);
if (bit_width < 32) {
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
if (is_signed) {
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
ctx.ConstU32(bit_width));
} else {
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
ctx.ConstU32(bit_width));
}
}
value = ctx.OpBitcast(ctx.F32[1], value);
return ConvertValue(ctx, value, num_format, bit_width);
}
break;
}
default:
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
}
}
template <u32 N>
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if constexpr (N == 1) {
return GetBufferFormatValue(ctx, handle, address, 0);
} else {
boost::container::static_vector<Id, N> ids;
for (u32 i = 0; i < N; i++) {
ids.push_back(GetBufferFormatValue(ctx, handle, address, i));
}
return ctx.OpCompositeConstruct(ctx.F32[N], ids);
}
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferFormatF32xN<1>(ctx, inst, handle, address); const auto& buffer = ctx.texture_buffers[handle];
} const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id texel = ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
return EmitLoadBufferFormatF32xN<2>(ctx, inst, handle, address); if (buffer.is_integer) {
} texel = ctx.OpBitcast(ctx.F32[4], texel);
}
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { return texel;
return EmitLoadBufferFormatF32xN<3>(ctx, inst, handle, address);
}
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
} }
template <u32 N> template <u32 N>
@ -467,6 +311,7 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
EmitStoreBufferF32xN<1>(ctx, handle, address, value); EmitStoreBufferF32xN<1>(ctx, handle, address, value);
} }
<<<<<<< HEAD
static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
u32 bit_width) { u32 bit_width) {
switch (format) { switch (format) {
@ -541,23 +386,16 @@ static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address,
} }
} }
=======
>>>>>>> 8b824588 (video_core: Use texture buffers for untyped format load/store)
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value); const auto& buffer = ctx.texture_buffers[handle];
} const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, if (buffer.is_integer) {
Id value) { value = ctx.OpBitcast(ctx.U32[4], value);
EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value); }
} ctx.OpImageWrite(tex_buffer, coord, value);
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value);
} }
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -48,6 +48,7 @@ EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32
DefineArithmeticTypes(); DefineArithmeticTypes();
DefineInterfaces(); DefineInterfaces();
DefineBuffers(); DefineBuffers();
DefineTextureBuffers();
DefineImagesAndSamplers(); DefineImagesAndSamplers();
DefineSharedMemory(); DefineSharedMemory();
} }
@ -123,21 +124,19 @@ void EmitContext::DefineInterfaces() {
DefineOutputs(); DefineOutputs();
} }
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) { switch (fmt) {
case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm: case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz: case AmdGpu::NumberFormat::SnormNz:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled: case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled: case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4]; return ctx.F32;
case AmdGpu::NumberFormat::Sint:
return ctx.S32;
case AmdGpu::NumberFormat::Uint:
return ctx.U32;
default: default:
break; break;
} }
@ -177,6 +176,16 @@ void EmitContext::DefineBufferOffsets() {
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
} }
for (auto& tex_buffer : texture_buffers) {
const u32 binding = tex_buffer.binding;
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
}
} }
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
@ -202,7 +211,7 @@ void EmitContext::DefineInputs() {
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) { for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)}; const Id type{GetAttributeType(*this, input.fmt)[4]};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
@ -328,27 +337,30 @@ void EmitContext::DefinePushDataBlock() {
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids; boost::container::small_vector<Id, 8> type_ids;
for (u32 i = 0; const auto& buffer : info.buffers) { const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
const auto sharp = buffer.GetVsharp(info); const Id struct_type{TypeStruct(record_array_type)};
const bool is_storage = buffer.IsStorage(sharp); if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; return struct_type;
}
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
: fmt::format("{}_cbuf_block_f32", stage);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
return struct_type;
};
for (const auto& desc : info.buffers) {
const auto sharp = desc.GetVsharp(info);
const bool is_storage = desc.IsStorage(sharp);
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1]; const Id data_type = (*data_types)[1];
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type) const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
: TypeArray(data_type, ConstU32(buffer.length))}; : TypeArray(data_type, ConstU32(desc.length))};
const Id struct_type{TypeStruct(record_array_type)}; const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name =
buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
}
const auto storage_class = const auto storage_class =
is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform; is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
@ -357,19 +369,39 @@ void EmitContext::DefineBuffers() {
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", buffer.sgpr_base)); Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
buffers.push_back({ buffers.push_back({
.id = id, .id = id,
.binding = binding++, .binding = binding++,
.data_types = data_types, .data_types = data_types,
.pointer_type = pointer_type, .pointer_type = pointer_type,
.dfmt = buffer.dfmt,
.nfmt = buffer.nfmt,
.stride = buffer.GetVsharp(info).GetStride(),
}); });
interfaces.push_back(id); interfaces.push_back(id);
i++; }
}
void EmitContext::DefineTextureBuffers() {
for (const auto& desc : info.texture_buffers) {
const bool is_integer =
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
const u32 sampled = desc.is_written ? 2 : 1;
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
sampled, spv::ImageFormat::Unknown)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
texture_buffers.push_back({
.id = id,
.binding = binding++,
.image_type = image_type,
.result_type = sampled_type[4],
.is_integer = is_integer,
});
interfaces.push_back(id);
} }
} }

View File

@ -207,13 +207,19 @@ public:
u32 binding; u32 binding;
const VectorIds* data_types; const VectorIds* data_types;
Id pointer_type; Id pointer_type;
AmdGpu::DataFormat dfmt; };
AmdGpu::NumberFormat nfmt; struct TextureBufferDefinition {
u32 stride; Id id;
Id coord_offset;
u32 binding;
Id image_type;
Id result_type;
bool is_integer;
}; };
u32& binding; u32& binding;
boost::container::small_vector<BufferDefinition, 16> buffers; boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
boost::container::small_vector<TextureDefinition, 8> images; boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers; boost::container::small_vector<Id, 4> samplers;
@ -238,6 +244,7 @@ private:
void DefineOutputs(); void DefineOutputs();
void DefinePushDataBlock(); void DefinePushDataBlock();
void DefineBuffers(); void DefineBuffers();
void DefineTextureBuffers();
void DefineImagesAndSamplers(); void DefineImagesAndSamplers();
void DefineSharedMemory(); void DefineSharedMemory();

View File

@ -191,8 +191,10 @@ public:
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst);
void BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst);
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
// Vector interpolation // Vector interpolation

View File

@ -56,57 +56,57 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Buffer load operations // Buffer load operations
case Opcode::TBUFFER_LOAD_FORMAT_X: case Opcode::TBUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, true, true, inst); return BUFFER_LOAD(1, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XY: case Opcode::TBUFFER_LOAD_FORMAT_XY:
return BUFFER_LOAD_FORMAT(2, true, true, inst); return BUFFER_LOAD(2, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XYZ: case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
return BUFFER_LOAD_FORMAT(3, true, true, inst); return BUFFER_LOAD(3, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XYZW: case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, true, true, inst); return BUFFER_LOAD(4, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_X: case Opcode::BUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, false, true, inst); return BUFFER_LOAD_FORMAT(1, inst);
case Opcode::BUFFER_LOAD_FORMAT_XY: case Opcode::BUFFER_LOAD_FORMAT_XY:
return BUFFER_LOAD_FORMAT(2, false, true, inst); return BUFFER_LOAD_FORMAT(2, inst);
case Opcode::BUFFER_LOAD_FORMAT_XYZ: case Opcode::BUFFER_LOAD_FORMAT_XYZ:
return BUFFER_LOAD_FORMAT(3, false, true, inst); return BUFFER_LOAD_FORMAT(3, inst);
case Opcode::BUFFER_LOAD_FORMAT_XYZW: case Opcode::BUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, false, true, inst); return BUFFER_LOAD_FORMAT(4, inst);
case Opcode::BUFFER_LOAD_DWORD: case Opcode::BUFFER_LOAD_DWORD:
return BUFFER_LOAD_FORMAT(1, false, false, inst); return BUFFER_LOAD(1, false, inst);
case Opcode::BUFFER_LOAD_DWORDX2: case Opcode::BUFFER_LOAD_DWORDX2:
return BUFFER_LOAD_FORMAT(2, false, false, inst); return BUFFER_LOAD(2, false, inst);
case Opcode::BUFFER_LOAD_DWORDX3: case Opcode::BUFFER_LOAD_DWORDX3:
return BUFFER_LOAD_FORMAT(3, false, false, inst); return BUFFER_LOAD(3, false, inst);
case Opcode::BUFFER_LOAD_DWORDX4: case Opcode::BUFFER_LOAD_DWORDX4:
return BUFFER_LOAD_FORMAT(4, false, false, inst); return BUFFER_LOAD(4, false, inst);
// Buffer store operations // Buffer store operations
case Opcode::BUFFER_STORE_FORMAT_X: case Opcode::BUFFER_STORE_FORMAT_X:
return BUFFER_STORE_FORMAT(1, false, true, inst); return BUFFER_STORE_FORMAT(1, inst);
case Opcode::BUFFER_STORE_FORMAT_XY: case Opcode::BUFFER_STORE_FORMAT_XY:
return BUFFER_STORE_FORMAT(2, false, true, inst); return BUFFER_STORE_FORMAT(2, inst);
case Opcode::BUFFER_STORE_FORMAT_XYZ: case Opcode::BUFFER_STORE_FORMAT_XYZ:
return BUFFER_STORE_FORMAT(3, false, true, inst); return BUFFER_STORE_FORMAT(3, inst);
case Opcode::BUFFER_STORE_FORMAT_XYZW: case Opcode::BUFFER_STORE_FORMAT_XYZW:
return BUFFER_STORE_FORMAT(4, false, true, inst); return BUFFER_STORE_FORMAT(4, inst);
case Opcode::TBUFFER_STORE_FORMAT_X: case Opcode::TBUFFER_STORE_FORMAT_X:
return BUFFER_STORE_FORMAT(1, true, true, inst); return BUFFER_STORE(1, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XY: case Opcode::TBUFFER_STORE_FORMAT_XY:
return BUFFER_STORE_FORMAT(2, true, true, inst); return BUFFER_STORE(2, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XYZ: case Opcode::TBUFFER_STORE_FORMAT_XYZ:
return BUFFER_STORE_FORMAT(3, true, true, inst); return BUFFER_STORE(3, true, inst);
case Opcode::BUFFER_STORE_DWORD: case Opcode::BUFFER_STORE_DWORD:
return BUFFER_STORE_FORMAT(1, false, false, inst); return BUFFER_STORE(1, false, inst);
case Opcode::BUFFER_STORE_DWORDX2: case Opcode::BUFFER_STORE_DWORDX2:
return BUFFER_STORE_FORMAT(2, false, false, inst); return BUFFER_STORE(2, false, inst);
case Opcode::BUFFER_STORE_DWORDX3: case Opcode::BUFFER_STORE_DWORDX3:
return BUFFER_STORE_FORMAT(3, false, false, inst); return BUFFER_STORE(3, false, inst);
case Opcode::BUFFER_STORE_DWORDX4: case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, false, inst); return BUFFER_STORE(4, false, inst);
// Buffer atomic operations // Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD: case Opcode::BUFFER_ATOMIC_ADD:
@ -349,8 +349,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
ir.ImageWrite(handle, body, value, {}); ir.ImageWrite(handle, body, value, {});
} }
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) {
const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::ScalarReg sharp{inst.src[2].code * 4};
@ -370,22 +369,19 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
info.index_enable.Assign(mtbuf.idxen); info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen); info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset); info.inst_offset.Assign(mtbuf.offset);
info.is_typed.Assign(is_typed);
if (is_typed) { if (is_typed) {
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt)); const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt)); const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
ASSERT(info.nfmt == AmdGpu::NumberFormat::Float && ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
(info.dmft == AmdGpu::DataFormat::Format32_32_32_32 || (dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
info.dmft == AmdGpu::DataFormat::Format32_32_32 || dmft == AmdGpu::DataFormat::Format32_32_32 ||
info.dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
info.dmft == AmdGpu::DataFormat::Format32));
} }
const IR::Value handle = const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = is_format ? ir.LoadBufferFormat(num_dwords, handle, address, info) const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
: ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code}; const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) { if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value}); ir.SetVectorReg(dst_reg, IR::F32{value});
@ -396,8 +392,34 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
} }
} }
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
const GcnInst& inst) { const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
const IR::Value address = [&] -> IR::Value {
if (mubuf.idxen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::Value soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mubuf.idxen);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBufferFormat(handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
}
}
void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::ScalarReg sharp{inst.src[2].code * 4};
@ -417,45 +439,76 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
info.index_enable.Assign(mtbuf.idxen); info.index_enable.Assign(mtbuf.idxen);
info.offset_enable.Assign(mtbuf.offen); info.offset_enable.Assign(mtbuf.offen);
info.inst_offset.Assign(mtbuf.offset); info.inst_offset.Assign(mtbuf.offset);
info.is_typed.Assign(is_typed);
if (is_typed) { if (is_typed) {
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt)); const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt)); const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
ASSERT(nfmt == AmdGpu::NumberFormat::Float &&
(dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
dmft == AmdGpu::DataFormat::Format32_32_32 ||
dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32));
} }
IR::Value value{}; IR::Value value{};
const IR::VectorReg src_reg{inst.src[1].code}; const IR::VectorReg src_reg{inst.src[1].code};
switch (num_dwords) { switch (num_dwords) {
case 1: case 1:
value = ir.GetVectorReg<Shader::IR::F32>(src_reg); value = ir.GetVectorReg<IR::F32>(src_reg);
break; break;
case 2: case 2:
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg), value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1)); ir.GetVectorReg<IR::F32>(src_reg + 1));
break; break;
case 3: case 3:
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg), value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1), ir.GetVectorReg<IR::F32>(src_reg + 1),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2)); ir.GetVectorReg<IR::F32>(src_reg + 2));
break; break;
case 4: case 4:
value = ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::F32>(src_reg), value = ir.CompositeConstruct(
ir.GetVectorReg<Shader::IR::F32>(src_reg + 1), ir.GetVectorReg<IR::F32>(src_reg), ir.GetVectorReg<IR::F32>(src_reg + 1),
ir.GetVectorReg<Shader::IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 3));
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break; break;
} }
const IR::Value handle = const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
if (is_format) { ir.StoreBuffer(num_dwords, handle, address, value, info);
ir.StoreBufferFormat(num_dwords, handle, address, value, info); }
} else {
ir.StoreBuffer(num_dwords, handle, address, value, info); void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
} const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
const IR::Value address = [&] -> IR::Value {
if (mubuf.idxen) {
return ir.GetVectorReg(vaddr);
}
return {};
}();
const IR::Value soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mubuf.idxen);
const IR::VectorReg src_reg{inst.src[1].code};
std::array<IR::Value, 4> comps{};
for (u32 i = 0; i < num_dwords; i++) {
comps[i] = ir.GetVectorReg<IR::F32>(src_reg + i);
}
for (u32 i = num_dwords; i < 4; i++) {
comps[i] = ir.Imm32(0.f);
}
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBufferFormat(handle, address, value, info);
} }
// TODO: U64
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf; const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};

View File

@ -325,20 +325,8 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
} }
} }
Value IREmitter::LoadBufferFormat(int num_dwords, const Value& handle, const Value& address, Value IREmitter::LoadBufferFormat(const Value& handle, const Value& address, BufferInstInfo info) {
BufferInstInfo info) { return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
switch (num_dwords) {
case 1:
return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
case 2:
return Inst(Opcode::LoadBufferFormatF32x2, Flags{info}, handle, address);
case 3:
return Inst(Opcode::LoadBufferFormatF32x3, Flags{info}, handle, address);
case 4:
return Inst(Opcode::LoadBufferFormatF32x4, Flags{info}, handle, address);
default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
}
} }
void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address, void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
@ -409,24 +397,9 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value); return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
} }
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
const Value& data, BufferInstInfo info) { BufferInstInfo info) {
switch (num_dwords) { Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
case 1:
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
break;
case 2:
Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data);
break;
case 3:
Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data);
break;
case 4:
Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data);
break;
default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
}
} }
U32 IREmitter::LaneId() { U32 IREmitter::LaneId() {

View File

@ -92,12 +92,12 @@ public:
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] Value LoadBufferFormat(int num_dwords, const Value& handle, const Value& address, [[nodiscard]] Value LoadBufferFormat(const Value& handle, const Value& address,
BufferInstInfo info); BufferInstInfo info);
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
BufferInstInfo info); BufferInstInfo info);
void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, void StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
const Value& data, BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);

View File

@ -56,9 +56,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4: case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32: case Opcode::StoreBufferFormatF32:
case Opcode::StoreBufferFormatF32x2:
case Opcode::StoreBufferFormatF32x3:
case Opcode::StoreBufferFormatF32x4:
case Opcode::StoreBufferU32: case Opcode::StoreBufferU32:
case Opcode::BufferAtomicIAdd32: case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicSMin32:

View File

@ -79,19 +79,13 @@ OPCODE(LoadBufferF32, F32, Opaq
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, ) OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, ) OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, ) OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32, F32, Opaque, Opaque, ) OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32x2, F32x2, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32x3, F32x3, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32x4, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferU32, U32, Opaque, Opaque, ) OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, ) OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, ) OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, )
OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, )
OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Buffer atomic operations // Buffer atomic operations

View File

@ -45,10 +45,6 @@ bool IsBufferStore(const IR::Inst& inst) {
case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
return true; return true;
default: default:
@ -62,10 +58,6 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4: case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::LoadBufferFormatF32x2:
case IR::Opcode::LoadBufferFormatF32x3:
case IR::Opcode::LoadBufferFormatF32x4:
case IR::Opcode::LoadBufferU32: case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferU32: case IR::Opcode::ReadConstBufferU32:
@ -75,6 +67,11 @@ bool IsBufferInstruction(const IR::Inst& inst) {
} }
} }
bool IsTextureBufferInstruction(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
}
static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
switch (num_format) { switch (num_format) {
case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Float:
@ -100,28 +97,6 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::LoadBufferFormatF32x2:
case IR::Opcode::LoadBufferFormatF32x3:
case IR::Opcode::LoadBufferFormatF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
switch (num_format) {
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uint:
case AmdGpu::NumberFormat::Sint:
case AmdGpu::NumberFormat::SnormNz:
return IR::Type::U32;
case AmdGpu::NumberFormat::Float:
return IR::Type::F32;
default:
UNREACHABLE();
}
case IR::Opcode::LoadBufferF32: case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x3:
@ -209,7 +184,8 @@ u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
class Descriptors { class Descriptors {
public: public:
explicit Descriptors(Info& info_) explicit Descriptors(Info& info_)
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images}, : info{info_}, buffer_resources{info_.buffers},
texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
sampler_resources{info_.samplers} {} sampler_resources{info_.samplers} {}
u32 Add(const BufferResource& desc) { u32 Add(const BufferResource& desc) {
@ -225,6 +201,16 @@ public:
return index; return index;
} }
u32 Add(const TextureBufferResource& desc) {
const u32 index{Add(texture_buffer_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset;
})};
auto& buffer = texture_buffer_resources[index];
buffer.is_written |= desc.is_written;
return index;
}
u32 Add(const ImageResource& desc) { u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base && return desc.sgpr_base == existing.sgpr_base &&
@ -259,6 +245,7 @@ private:
const Info& info; const Info& info;
BufferResourceList& buffer_resources; BufferResourceList& buffer_resources;
TextureBufferResourceList& texture_buffer_resources;
ImageResourceList& image_resources; ImageResourceList& image_resources;
SamplerResourceList& sampler_resources; SamplerResourceList& sampler_resources;
}; };
@ -355,20 +342,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
}; };
} }
static constexpr size_t MaxUboSize = 65536;
static bool IsLoadBufferFormat(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::LoadBufferFormatF32x2:
case IR::Opcode::LoadBufferFormatF32x3:
case IR::Opcode::LoadBufferFormatF32x4:
return true;
default:
return false;
}
}
static u32 BufferLength(const AmdGpu::Buffer& buffer) { static u32 BufferLength(const AmdGpu::Buffer& buffer) {
const auto stride = buffer.GetStride(); const auto stride = buffer.GetStride();
if (stride < sizeof(f32)) { if (stride < sizeof(f32)) {
@ -434,14 +407,6 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
// Update buffer descriptor format. // Update buffer descriptor format.
const auto inst_info = inst.Flags<IR::BufferInstInfo>(); const auto inst_info = inst.Flags<IR::BufferInstInfo>();
auto& buffer_desc = info.buffers[binding];
if (inst_info.is_typed) {
buffer_desc.dfmt = inst_info.dmft;
buffer_desc.nfmt = inst_info.nfmt;
} else {
buffer_desc.dfmt = buffer.GetDataFmt();
buffer_desc.nfmt = buffer.GetNumberFmt();
}
// Replace handle with binding index in buffer resource list. // Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -454,20 +419,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
return; return;
} }
if (IsLoadBufferFormat(inst)) {
if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) {
info.uses_fp16 = true;
}
} else {
const u32 stride = buffer.GetStride();
if (stride < 4) {
LOG_WARNING(Render_Vulkan,
"non-formatting load_buffer_* is not implemented for stride {}", stride);
}
}
// Compute address of the buffer using the stride. // Compute address of the buffer using the stride.
// Todo: What if buffer is rebound with different stride?
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
if (inst_info.index_enable) { if (inst_info.index_enable) {
const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)} const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)}
@ -482,6 +434,25 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
inst.SetArg(1, address); inst.SetArg(1, address);
} }
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
const IR::Inst* handle = inst.Arg(0).InstRecursive();
const IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
const s32 binding = descriptors.Add(TextureBufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.nfmt = buffer.GetNumberFmt(),
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
// Replace handle with binding index in texture buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
const IR::Value& z, bool is_storage) { const IR::Value& z, bool is_storage) {
// When cubemap is written with imageStore it is treated like 2DArray. // When cubemap is written with imageStore it is treated like 2DArray.
@ -666,6 +637,10 @@ void ResourceTrackingPass(IR::Program& program) {
PatchBufferInstruction(*block, inst, info, descriptors); PatchBufferInstruction(*block, inst, info, descriptors);
continue; continue;
} }
if (IsTextureBufferInstruction(inst)) {
PatchTextureBufferInstruction(*block, inst, info, descriptors);
continue;
}
if (IsImageInstruction(inst)) { if (IsImageInstruction(inst)) {
PatchImageInstruction(*block, inst, info, descriptors); PatchImageInstruction(*block, inst, info, descriptors);
} }

View File

@ -29,6 +29,10 @@ void Visit(Info& info, IR::Inst& inst) {
case IR::Opcode::ImageWrite: case IR::Opcode::ImageWrite:
info.has_storage_images = true; info.has_storage_images = true;
break; break;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32:
info.has_texel_buffers = true;
break;
case IR::Opcode::QuadShuffle: case IR::Opcode::QuadShuffle:
info.uses_group_quad = true; info.uses_group_quad = true;
break; break;

View File

@ -66,9 +66,6 @@ union BufferInstInfo {
BitField<0, 1, u32> index_enable; BitField<0, 1, u32> index_enable;
BitField<1, 1, u32> offset_enable; BitField<1, 1, u32> offset_enable;
BitField<2, 12, u32> inst_offset; BitField<2, 12, u32> inst_offset;
BitField<14, 4, AmdGpu::DataFormat> dmft;
BitField<18, 3, AmdGpu::NumberFormat> nfmt;
BitField<21, 1, u32> is_typed;
}; };
enum class ScalarReg : u32 { enum class ScalarReg : u32 {

View File

@ -81,8 +81,6 @@ struct BufferResource {
u32 length; u32 length;
IR::Type used_types; IR::Type used_types;
AmdGpu::Buffer inline_cbuf; AmdGpu::Buffer inline_cbuf;
AmdGpu::DataFormat dfmt;
AmdGpu::NumberFormat nfmt;
bool is_instance_data{}; bool is_instance_data{};
bool is_written{}; bool is_written{};
@ -107,6 +105,23 @@ struct BufferResource {
}; };
using BufferResourceList = boost::container::static_vector<BufferResource, 16>; using BufferResourceList = boost::container::static_vector<BufferResource, 16>;
struct TextureBufferResource {
u32 sgpr_base;
u32 dword_offset;
AmdGpu::NumberFormat nfmt;
bool is_written{};
u64 GetKey(const Info& info) const {
const auto sharp = GetVsharp(info);
const bool is_integer = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Uint ||
sharp.GetNumberFmt() == AmdGpu::NumberFormat::Sint;
return is_integer;
}
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
using TextureBufferResourceList = boost::container::static_vector<TextureBufferResource, 16>;
struct ImageResource { struct ImageResource {
u32 sgpr_base; u32 sgpr_base;
u32 dword_offset; u32 dword_offset;
@ -207,6 +222,7 @@ struct Info {
s8 instance_offset_sgpr = -1; s8 instance_offset_sgpr = -1;
BufferResourceList buffers; BufferResourceList buffers;
TextureBufferResourceList texture_buffers;
ImageResourceList images; ImageResourceList images;
SamplerResourceList samplers; SamplerResourceList samplers;
@ -222,12 +238,13 @@ struct Info {
u64 pgm_hash{}; u64 pgm_hash{};
u32 shared_memory_size{}; u32 shared_memory_size{};
bool has_storage_images{}; bool has_storage_images{};
bool has_texel_buffers{};
bool has_discard{}; bool has_discard{};
bool has_image_gather{}; bool has_image_gather{};
bool has_image_query{}; bool has_image_query{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_shared{}; bool uses_shared{};
bool uses_fp16{}; bool uses_fp16{true};
bool uses_step_rates{}; bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions bool translation_failed{}; // indicates that shader has unsupported instructions
@ -243,7 +260,7 @@ struct Info {
} }
size_t NumBindings() const noexcept { size_t NumBindings() const noexcept {
return buffers.size() + images.size() + samplers.size(); return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
} }
u64 GetStageSpecializedKey(u32 binding = 0) const noexcept { u64 GetStageSpecializedKey(u32 binding = 0) const noexcept {
@ -251,6 +268,9 @@ struct Info {
for (const auto& buffer : buffers) { for (const auto& buffer : buffers) {
key = HashCombine(key, buffer.GetKey(*this)); key = HashCombine(key, buffer.GetKey(*this));
} }
for (const auto& buffer : texture_buffers) {
key = HashCombine(key, buffer.GetKey(*this));
}
for (const auto& image : images) { for (const auto& image : images) {
key = HashCombine(key, image.GetKey(*this)); key = HashCombine(key, image.GetKey(*this));
} }
@ -274,6 +294,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset); return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
} }
constexpr AmdGpu::Buffer TextureBufferResource::GetVsharp(const Info& info) const noexcept {
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
constexpr AmdGpu::Image ImageResource::GetTsharp(const Info& info) const noexcept { constexpr AmdGpu::Image ImageResource::GetTsharp(const Info& info) const noexcept {
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset); return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
} }

View File

@ -33,6 +33,15 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
}); });
} }
for (const auto& tex_buffer : info->texture_buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& image : info->images) { for (const auto& image : info->images) {
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
@ -91,6 +100,7 @@ ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const { VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures. // Bind resource buffers and textures.
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos; boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos; boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
@ -141,6 +151,41 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}); });
} }
for (const auto& tex_buffer : info->texture_buffers) {
const auto vsharp = tex_buffer.GetVsharp(*info);
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
if (tex_buffer.is_written) {
texture_cache.InvalidateMemory(address, size, true);
}
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding, adjust / fmt_stride);
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, vsharp.GetDataFmt(),
vsharp.GetNumberFmt());
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
}
for (const auto& image_desc : info->images) { for (const auto& image_desc : info->images) {
const auto tsharp = image_desc.GetTsharp(*info); const auto tsharp = image_desc.GetTsharp(*info);
VideoCore::ImageInfo image_info{tsharp}; VideoCore::ImageInfo image_info{tsharp};

View File

@ -316,6 +316,15 @@ void GraphicsPipeline::BuildDescSetLayout() {
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
}); });
} }
for (const auto& tex_buffer : stage->texture_buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& image : stage->images) { for (const auto& image : stage->images) {
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
@ -346,6 +355,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
VideoCore::BufferCache& buffer_cache, VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const { VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures. // Bind resource buffers and textures.
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos; boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos; boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
@ -394,6 +404,38 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
}); });
} }
for (const auto& tex_buffer : stage->texture_buffers) {
const auto vsharp = tex_buffer.GetVsharp(*stage);
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding, adjust / fmt_stride);
}
buffer_view = vk_buffer->View(offset, size + adjust, vsharp.GetDataFmt(),
vsharp.GetNumberFmt());
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
}
boost::container::static_vector<AmdGpu::Image, 16> tsharps; boost::container::static_vector<AmdGpu::Image, 16> tsharps;
for (const auto& image_desc : stage->images) { for (const auto& image_desc : stage->images) {
const auto tsharp = image_desc.GetTsharp(*stage); const auto tsharp = image_desc.GetTsharp(*stage);

View File

@ -192,6 +192,11 @@ public:
return properties.limits.minStorageBufferOffsetAlignment; return properties.limits.minStorageBufferOffsetAlignment;
} }
/// Returns the minimum required alignment for texel buffers
vk::DeviceSize TexelBufferMinAlignment() const {
return properties.limits.minTexelBufferOffsetAlignment;
}
/// Returns the minimum alignemt required for accessing host-mapped device memory /// Returns the minimum alignemt required for accessing host-mapped device memory
vk::DeviceSize NonCoherentAtomSize() const { vk::DeviceSize NonCoherentAtomSize() const {
return properties.limits.nonCoherentAtomSize; return properties.limits.nonCoherentAtomSize;

View File

@ -273,6 +273,10 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<cons
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
perm_idx != 0 ? "(permutation)" : ""); perm_idx != 0 ? "(permutation)" : "");
if (Config::dumpShaders()) {
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
}
block_pool.ReleaseContents(); block_pool.ReleaseContents();
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
@ -281,7 +285,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<cons
const u64 key = info.GetStageSpecializedKey(binding); const u64 key = info.GetStageSpecializedKey(binding);
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
if (Config::dumpShaders()) { if (Config::dumpShaders()) {
DumpShader(spv, key, info.stage, perm_idx, "spv"); DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
} }
// Create module and set name to hash in renderdoc // Create module and set name to hash in renderdoc