shader_recompiler: basic implementation of `BUFFER_STORE_FORMAT_` (#431)

* shader_recompiler: basic implementation of buffer store w\ fmt conversion

* added `Format16` dfmt
This commit is contained in:
psucien 2024-08-15 00:15:07 +02:00 committed by GitHub
parent 6f4e1a47b9
commit 9adc638220
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 165 additions and 10 deletions

View File

@ -467,4 +467,96 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
EmitStoreBufferF32xN<1>(ctx, handle, address, value); EmitStoreBufferF32xN<1>(ctx, handle, address, value);
} }
static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
u32 bit_width) {
switch (format) {
case AmdGpu::NumberFormat::Unorm:
return ctx.OpConvertFToU(
ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width)))));
case AmdGpu::NumberFormat::Uint:
return ctx.OpBitcast(ctx.U32[1], value);
case AmdGpu::NumberFormat::Float:
return value;
default:
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
magic_enum::enum_name(format));
}
}
template <u32 N>
static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
auto& buffer = ctx.buffers[handle];
const auto format = buffer.buffer.GetDataFmt();
const auto num_format = buffer.buffer.GetNumberFmt();
switch (format) {
case AmdGpu::DataFormat::FormatInvalid:
return;
case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format16:
case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format32_32_32_32: {
ASSERT(N == AmdGpu::NumComponents(format));
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
Id packed_value{};
for (u32 i = 0; i < N; i++) {
const u32 bit_width = AmdGpu::ComponentBits(format, i);
const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32;
const Id comp{ConvertF32ToFormat(
ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format,
bit_width)};
if (bit_width == 32) {
if constexpr (N == 1) {
ctx.OpStore(ptr, comp);
} else {
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id,
ctx.u32_zero_value, index_i);
ctx.OpStore(ptr, comp);
}
} else {
if (i == 0) {
packed_value = comp;
} else {
packed_value =
ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp,
ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width));
}
if (i == N - 1) {
ctx.OpStore(ptr, packed_value);
}
}
}
} break;
default:
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
}
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value);
}
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value);
}
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value);
}
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -76,6 +76,10 @@ void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);

View File

@ -186,7 +186,7 @@ public:
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
// Vector interpolation // Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst);

View File

@ -53,6 +53,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::IMAGE_GET_RESINFO: case Opcode::IMAGE_GET_RESINFO:
return IMAGE_GET_RESINFO(inst); return IMAGE_GET_RESINFO(inst);
// Buffer load operations
case Opcode::TBUFFER_LOAD_FORMAT_X: case Opcode::TBUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, true, true, inst); return BUFFER_LOAD_FORMAT(1, true, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XY: case Opcode::TBUFFER_LOAD_FORMAT_XY:
@ -61,6 +62,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_LOAD_FORMAT(3, true, true, inst); return BUFFER_LOAD_FORMAT(3, true, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XYZW: case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, true, true, inst); return BUFFER_LOAD_FORMAT(4, true, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_X: case Opcode::BUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, false, true, inst); return BUFFER_LOAD_FORMAT(1, false, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_XY: case Opcode::BUFFER_LOAD_FORMAT_XY:
@ -69,6 +71,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_LOAD_FORMAT(3, false, true, inst); return BUFFER_LOAD_FORMAT(3, false, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_XYZW: case Opcode::BUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, false, true, inst); return BUFFER_LOAD_FORMAT(4, false, true, inst);
case Opcode::BUFFER_LOAD_DWORD: case Opcode::BUFFER_LOAD_DWORD:
return BUFFER_LOAD_FORMAT(1, false, false, inst); return BUFFER_LOAD_FORMAT(1, false, false, inst);
case Opcode::BUFFER_LOAD_DWORDX2: case Opcode::BUFFER_LOAD_DWORDX2:
@ -77,16 +80,25 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_LOAD_FORMAT(3, false, false, inst); return BUFFER_LOAD_FORMAT(3, false, false, inst);
case Opcode::BUFFER_LOAD_DWORDX4: case Opcode::BUFFER_LOAD_DWORDX4:
return BUFFER_LOAD_FORMAT(4, false, false, inst); return BUFFER_LOAD_FORMAT(4, false, false, inst);
// Buffer store operations
case Opcode::BUFFER_STORE_FORMAT_X: case Opcode::BUFFER_STORE_FORMAT_X:
case Opcode::BUFFER_STORE_DWORD: return BUFFER_STORE_FORMAT(1, false, true, inst);
return BUFFER_STORE_FORMAT(1, false, inst); case Opcode::BUFFER_STORE_FORMAT_XY:
case Opcode::BUFFER_STORE_DWORDX2: return BUFFER_STORE_FORMAT(2, false, true, inst);
return BUFFER_STORE_FORMAT(2, false, inst); case Opcode::BUFFER_STORE_FORMAT_XYZ:
case Opcode::BUFFER_STORE_DWORDX3: return BUFFER_STORE_FORMAT(3, false, true, inst);
return BUFFER_STORE_FORMAT(3, false, inst);
case Opcode::BUFFER_STORE_FORMAT_XYZW: case Opcode::BUFFER_STORE_FORMAT_XYZW:
return BUFFER_STORE_FORMAT(4, false, true, inst);
case Opcode::BUFFER_STORE_DWORD:
return BUFFER_STORE_FORMAT(1, false, false, inst);
case Opcode::BUFFER_STORE_DWORDX2:
return BUFFER_STORE_FORMAT(2, false, false, inst);
case Opcode::BUFFER_STORE_DWORDX3:
return BUFFER_STORE_FORMAT(3, false, false, inst);
case Opcode::BUFFER_STORE_DWORDX4: case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, inst); return BUFFER_STORE_FORMAT(4, false, false, inst);
default: default:
LogMissingOpcode(inst); LogMissingOpcode(inst);
} }
@ -359,7 +371,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
} }
} }
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format,
const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::ScalarReg sharp{inst.src[2].code * 4};
@ -410,8 +423,12 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
const IR::Value handle = const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
if (is_format) {
ir.StoreBufferFormat(num_dwords, handle, address, value, info);
} else {
ir.StoreBuffer(num_dwords, handle, address, value, info); ir.StoreBuffer(num_dwords, handle, address, value, info);
} }
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) { void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;

View File

@ -347,6 +347,26 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
} }
} }
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
const Value& data, BufferInstInfo info) {
switch (num_dwords) {
case 1:
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
break;
case 2:
Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data);
break;
case 3:
Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data);
break;
case 4:
Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data);
break;
default:
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
}
}
U32 IREmitter::LaneId() { U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId); return Inst<U32>(Opcode::LaneId);
} }

View File

@ -93,6 +93,8 @@ public:
BufferInstInfo info); BufferInstInfo info);
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
BufferInstInfo info); BufferInstInfo info);
void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
const Value& data, BufferInstInfo info);
[[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId(); [[nodiscard]] U32 WarpId();

View File

@ -55,6 +55,10 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x2: case Opcode::StoreBufferF32x2:
case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4: case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::StoreBufferFormatF32x2:
case Opcode::StoreBufferFormatF32x3:
case Opcode::StoreBufferFormatF32x4:
case Opcode::StoreBufferU32: case Opcode::StoreBufferU32:
case Opcode::WriteSharedU128: case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64: case Opcode::WriteSharedU64:

View File

@ -82,6 +82,10 @@ OPCODE(StoreBufferF32, Void, Opaq
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, )
OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, )
OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, )
OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Vector utility // Vector utility

View File

@ -37,6 +37,10 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
return true; return true;
default: default:
@ -73,6 +77,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::LoadBufferFormatF32x2: case IR::Opcode::LoadBufferFormatF32x2:
case IR::Opcode::LoadBufferFormatF32x3: case IR::Opcode::LoadBufferFormatF32x3:
case IR::Opcode::LoadBufferFormatF32x4: case IR::Opcode::LoadBufferFormatF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
switch (num_format) { switch (num_format) {
case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm: case AmdGpu::NumberFormat::Snorm:
@ -112,6 +120,10 @@ bool IsBufferStore(const IR::Inst& inst) {
case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
return true; return true;
default: default: