Added Legacy Min/Max ops (#266)

* Forwarding V_MAX_LEGACY_F32 to V_MAX3_F32. Fixes Translation error in Geometry Wars 3.

* Forwarded to correct op

* Implemented Legacy Max/Min using NMax/NMin

* Added extra argument to Min/Max op codes

* Removed extra translator functions, replaced with bool

* Formatting
This commit is contained in:
Stolas 2024-07-08 19:24:12 +10:00 committed by GitHub
parent 550e05b3ca
commit 2620919f0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 39 additions and 18 deletions

View File

@ -51,7 +51,11 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
}
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy) {
return ctx.OpNMax(ctx.F32[1], a, b);
}
return ctx.OpFMax(ctx.F32[1], a, b);
}
@ -59,7 +63,11 @@ Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
return ctx.OpFMax(ctx.F64[1], a, b);
}
Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy) {
return ctx.OpNMin(ctx.F32[1], a, b);
}
return ctx.OpFMin(ctx.F32[1], a, b);
}

View File

@ -165,9 +165,9 @@ Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);

View File

@ -639,6 +639,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::V_MIN_LEGACY_F32:
translator.V_MIN_F32(inst, true);
break;
case Opcode::V_MADMK_F32:
translator.V_MADMK_F32(inst);
break;
@ -889,6 +892,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_MAD_LEGACY_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_MAX_LEGACY_F32:
translator.V_MAX_F32(inst, true);
break;
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:
translator.V_RSQ_F32(inst);

View File

@ -111,14 +111,14 @@ public:
void V_RCP_F32(const GcnInst& inst);
void V_FMA_F32(const GcnInst& inst);
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
void V_MAX_U32(bool is_signed, const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
void V_EXP_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MIN3_F32(const GcnInst& inst);
void V_MADMK_F32(const GcnInst& inst);
void V_CUBEMA_F32(const GcnInst& inst);

View File

@ -203,10 +203,10 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
}
}
void Translator::V_MAX_F32(const GcnInst& inst) {
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMax(src0, src1));
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
}
void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
@ -240,10 +240,10 @@ void Translator::V_SQRT_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPSqrt(src0));
}
void Translator::V_MIN_F32(const GcnInst& inst) {
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMin(src0, src1));
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
}
void Translator::V_MIN3_F32(const GcnInst& inst) {

View File

@ -865,28 +865,35 @@ U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) {
return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
}
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMax32, lhs, rhs);
return Inst<F32>(Opcode::FPMax32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMax64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMin32, lhs, rhs);
return Inst<F32>(Opcode::FPMin32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMin64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());

View File

@ -149,8 +149,8 @@ public:
[[nodiscard]] U1 FPIsInf(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);

View File

@ -154,9 +154,9 @@ OPCODE(FPAdd64, F64, F64,
OPCODE(FPSub32, F32, F32, F32, )
OPCODE(FPFma32, F32, F32, F32, F32, )
OPCODE(FPFma64, F64, F64, F64, F64, )
OPCODE(FPMax32, F32, F32, F32, )
OPCODE(FPMax32, F32, F32, F32, U1, )
OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, F32, F32, F32, )
OPCODE(FPMin32, F32, F32, F32, U1, )
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )