shader_recompiler: Add most image integer atomic ops

This commit is contained in:
IndecisiveTurtle 2024-07-02 00:43:22 +03:00
parent 91aed76920
commit 668f7673d9
11 changed files with 318 additions and 32 deletions

View File

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
namespace {
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value};
return {scope, semantics};
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.ConstU32(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
} // Anonymous namespace
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicOr);
}
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicXor);
}
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange);
}
} // namespace Shader::Backend::SPIRV

View File

@ -356,6 +356,29 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id value);
Id EmitLaneId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);

View File

@ -338,6 +338,8 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
++binding;
}
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
if (info.samplers.empty()) {
return;
}

View File

@ -167,6 +167,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};
struct TextureDefinition {
Id id;

View File

@ -3429,48 +3429,48 @@ constexpr std::array<InstFormat, 112> InstructionFormatMIMG = {{
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
// 17 = IMAGE_ATOMIC_ADD
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 18 = IMAGE_ATOMIC_SUB
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
{},
// 20 = IMAGE_ATOMIC_SMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
ScalarType::Sint32},
// 21 = IMAGE_ATOMIC_UMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 22 = IMAGE_ATOMIC_SMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Sint32,
ScalarType::Sint32},
// 23 = IMAGE_ATOMIC_UMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 24 = IMAGE_ATOMIC_AND
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 25 = IMAGE_ATOMIC_OR
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 26 = IMAGE_ATOMIC_XOR
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 27 = IMAGE_ATOMIC_INC
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 28 = IMAGE_ATOMIC_DEC
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 29 = IMAGE_ATOMIC_FCMPSWAP
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 30 = IMAGE_ATOMIC_FMIN
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 31 = IMAGE_ATOMIC_FMAX
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},
// 32 = IMAGE_SAMPLE
{InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32,
ScalarType::Float32},

View File

@ -379,6 +379,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE_L:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
break;
case Opcode::IMAGE_GET_LOD:
translator.IMAGE_GET_LOD(inst);
break;
@ -718,6 +721,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_MAD_I32_I24(inst);
break;
case Opcode::V_MUL_I32_I24:
case Opcode::V_MUL_U32_U24:
translator.V_MUL_I32_I24(inst);
break;
case Opcode::V_SUB_I32:

View File

@ -26,6 +26,25 @@ enum class ConditionOp : u32 {
TRU,
};
enum class AtomicOp : u32 {
Swap,
CmpSwap,
Add,
Sub,
Smin,
Umin,
Smax,
Umax,
And,
Or,
Xor,
Inc,
Dec,
FCmpSwap,
Fmin,
Fmax,
};
enum class NegateMode : u32 {
None,
Src1,
@ -154,6 +173,7 @@ public:
void IMAGE_STORE(const GcnInst& inst);
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
void IMAGE_GET_LOD(const GcnInst& inst);
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
// Export
void EXP(const GcnInst& inst);

View File

@ -212,10 +212,15 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
ASSERT(mimg.dmask == 0xF);
const IR::Value value = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
comps.push_back(ir.Imm32(0.f));
continue;
}
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
}
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
ir.ImageWrite(handle, body, value, {});
}
@ -322,4 +327,48 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
}
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg val_reg{inst.dst[0].code};
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
const IR::Value prev = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {});
case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {});
case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {});
case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {});
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {});
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {});
case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {});
case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {});
case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {});
case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {});
default:
UNREACHABLE();
}
}();
if (mimg.glc) {
ir.SetVectorReg(val_reg, IR::U32{prev});
}
}
} // namespace Shader::Gcn

View File

@ -1167,6 +1167,73 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicIAdd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMin(handle, coords, value, info)
: ImageAtomicUMin(handle, coords, value, info);
}
Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicSMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
: ImageAtomicUMax(handle, coords, value, info);
}
Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicInc32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicDec32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicAnd32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicOr32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicXor32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info) {

View File

@ -200,6 +200,33 @@ public:
[[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info);
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);

View File

@ -274,6 +274,19 @@ OPCODE(ImageGradient, F32x4, Opaq
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )