shader_recompiler: Normal gathers

This commit is contained in:
IndecisiveTurtle 2024-07-17 16:49:45 +03:00
parent 53fb73e95f
commit cd009cfec6
7 changed files with 38 additions and 15 deletions

View File

@ -9,6 +9,9 @@ namespace Shader::Backend::SPIRV {
struct ImageOperands { struct ImageOperands {
void Add(spv::ImageOperandsMask new_mask, Id value) { void Add(spv::ImageOperandsMask new_mask, Id value) {
if (!Sirit::ValidId(value)) {
return;
}
mask = static_cast<spv::ImageOperandsMask>(static_cast<u32>(mask) | mask = static_cast<spv::ImageOperandsMask>(static_cast<u32>(mask) |
static_cast<u32>(new_mask)); static_cast<u32>(new_mask));
operands.push_back(value); operands.push_back(value);
@ -25,9 +28,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands; ImageOperands operands;
if (Sirit::ValidId(offset)) { operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.Add(spv::ImageOperandsMask::ConstOffset, offset);
}
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
operands.operands); operands.operands);
} }
@ -61,18 +62,30 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
} }
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, const IR::Value& offset2) { Id offset, Id offset2) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref); const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp),
operands.mask, operands.operands);
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id offset, Id offset2, Id dref) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Offset, offset);
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref,
operands.mask, operands.operands);
} }
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,

View File

@ -358,10 +358,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
Id bias_lc, const IR::Value& offset); Id bias_lc, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
Id bias_lc, Id offset); Id bias_lc, Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, const IR::Value& offset2); Id offset, Id offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref); Id offset, Id offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms); Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);

View File

@ -456,6 +456,8 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
translator.IMAGE_GET_LOD(inst); translator.IMAGE_GET_LOD(inst);
break; break;
case Opcode::IMAGE_GATHER4_C: case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_LZ:
case Opcode::IMAGE_GATHER4_LZ_O:
translator.IMAGE_GATHER(inst); translator.IMAGE_GATHER(inst);
break; break;
case Opcode::IMAGE_STORE: case Opcode::IMAGE_STORE:

View File

@ -158,6 +158,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
info.force_level0.Assign(flags.test(MimgModifier::Level0)); info.force_level0.Assign(flags.test(MimgModifier::Level0));
info.explicit_lod.Assign(explicit_lod); info.explicit_lod.Assign(explicit_lod);
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
// Issue IR instruction, leaving unknown fields blank to patch later. // Issue IR instruction, leaving unknown fields blank to patch later.
const IR::Value texel = [&]() -> IR::Value { const IR::Value texel = [&]() -> IR::Value {

View File

@ -543,7 +543,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
if (inst_info.has_offset) { if (inst_info.has_offset) {
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
const u32 arg_pos = inst_info.is_depth ? 4 : 3; const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather ||
inst.GetOpcode() == IR::Opcode::ImageGatherDref;
const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3);
const IR::Value arg = inst.Arg(arg_pos); const IR::Value arg = inst.Arg(arg_pos);
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); };

View File

@ -39,6 +39,7 @@ union TextureInstInfo {
BitField<3, 1, u32> force_level0; BitField<3, 1, u32> force_level0;
BitField<4, 1, u32> explicit_lod; BitField<4, 1, u32> explicit_lod;
BitField<5, 1, u32> has_offset; BitField<5, 1, u32> has_offset;
BitField<6, 2, u32> gather_comp;
}; };
union BufferInstInfo { union BufferInstInfo {

View File

@ -420,6 +420,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Uint) { num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint; return vk::Format::eR32G32B32A32Uint;
} }
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR32G32B32A32Sint;
}
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) { if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint; return vk::Format::eR8Sint;
} }