recompiler: pack 4xf32 to 8_8_8_8 Unorm if buffer has such format
This commit is contained in:
parent
ad3b6c793c
commit
3450135a3f
|
@ -425,13 +425,36 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
|
||||||
return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
|
return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <u32 N>
|
||||||
|
static Id PackF32ToUnorm(EmitContext& ctx, Id value) {
|
||||||
|
Id result{};
|
||||||
|
for (u32 i = 0; i < N; i++) {
|
||||||
|
const Id comp =
|
||||||
|
ctx.OpConvertFToU(ctx.U32[1], ctx.OpFMul(ctx.F32[1], ctx.ConstF32(255.0f),
|
||||||
|
ctx.OpCompositeExtract(ctx.F32[1], value, i)));
|
||||||
|
if (i == 0) {
|
||||||
|
result = comp;
|
||||||
|
} else {
|
||||||
|
const Id comp_shifted = ctx.OpShiftLeftLogical(
|
||||||
|
ctx.U32[1], comp, ctx.OpIMul(ctx.U32[1], ctx.ConstU32(i), ctx.ConstU32(8u)));
|
||||||
|
result = ctx.OpBitwiseOr(ctx.U32[1], result, comp_shifted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
|
switch (buffer.buffer.GetNumberFmt()) {
|
||||||
|
case AmdGpu::NumberFormat::Uint:
|
||||||
|
case AmdGpu::NumberFormat::Float: {
|
||||||
|
ASSERT(buffer.buffer.GetStride() == sizeof(float) * N);
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const Id ptr{
|
||||||
|
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||||
ctx.OpStore(ptr, value);
|
ctx.OpStore(ptr, value);
|
||||||
} else {
|
} else {
|
||||||
for (u32 i = 0; i < N; i++) {
|
for (u32 i = 0; i < N; i++) {
|
||||||
|
@ -441,6 +464,17 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va
|
||||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case AmdGpu::NumberFormat::Unorm: {
|
||||||
|
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
||||||
|
ctx.OpStore(ptr, ctx.OpBitcast(ctx.F32[1], PackF32ToUnorm<N>(ctx, value)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
|
|
Loading…
Reference in New Issue