shader_recompiler: fix for devergence scope detection

This commit is contained in:
psucien 2024-08-25 20:25:20 +02:00
parent c9c8673099
commit e8f96b57ed
4 changed files with 24 additions and 21 deletions

View File

@ -21,13 +21,13 @@ struct Compare {
} }
}; };
static IR::Condition MakeCondition(Opcode opcode) { static IR::Condition MakeCondition(const GcnInst& inst) {
if (IsCmpxOpcode(opcode)) { if (inst.IsCmpx()) {
ASSERT(opcode == Opcode::V_CMPX_NE_U32); ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
return IR::Condition::Execnz; return IR::Condition::Execnz;
} }
switch (opcode) { switch (inst.opcode) {
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:
return IR::Condition::Scc0; return IR::Condition::Scc0;
case Opcode::S_CBRANCH_SCC1: case Opcode::S_CBRANCH_SCC1:
@ -98,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
// While this instruction does not save EXEC it is often used paired // While this instruction does not save EXEC it is often used paired
// with SAVEEXEC to mask the threads that didn't pass the condition // with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch. // of initial branch.
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32; (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
inst.opcode == Opcode::V_CMPX_NE_U32;
}; };
const auto is_close_scope = [](const GcnInst& inst) { const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction // Closing an EXEC scope can be either a branch instruction
@ -108,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch. // Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
// Those instructions need to be wrapped in the condition as well so allow branch // Those instructions need to be wrapped in the condition as well so allow branch
// as end scope instruction. // as end scope instruction.
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64; inst.opcode == Opcode::S_CBRANCH_EXECZ ||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
}; };
// Since we will be adding new labels, avoid iterating those as well. // Since we will be adding new labels, avoid iterating those as well.
@ -175,7 +177,7 @@ void CFG::EmitBlocks() {
block->begin_index = GetIndex(start); block->begin_index = GetIndex(start);
block->end_index = end_index; block->end_index = end_index;
block->end_inst = end_inst; block->end_inst = end_inst;
block->cond = MakeCondition(end_inst.opcode); block->cond = MakeCondition(end_inst);
blocks.insert(*block); blocks.insert(*block);
} }
} }

View File

@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
return false; return false;
} }
bool GcnInst::IsCmpx() const {
if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
(opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
(opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
(opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
(opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
(opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
(opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
(opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
return true;
}
return false;
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -203,6 +203,7 @@ struct GcnInst {
bool IsUnconditionalBranch() const; bool IsUnconditionalBranch() const;
bool IsConditionalBranch() const; bool IsConditionalBranch() const;
bool IsFork() const; bool IsFork() const;
bool IsCmpx() const;
}; };
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -2194,20 +2194,6 @@ enum class Opcode : u32 {
EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP, EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP,
}; };
static constexpr bool IsCmpxOpcode(Opcode op) {
if ((op >= Opcode::V_CMPX_F_F32 && op <= Opcode::V_CMPX_T_F32) ||
(op >= Opcode::V_CMPX_F_F64 && op <= Opcode::V_CMPX_T_F64) ||
(op >= Opcode::V_CMPSX_F_F32 && op <= Opcode::V_CMPSX_T_F32) ||
(op >= Opcode::V_CMPSX_F_F64 && op <= Opcode::V_CMPSX_T_F64) ||
(op >= Opcode::V_CMPX_F_I32 && op <= Opcode::V_CMPX_CLASS_F32) ||
(op >= Opcode::V_CMPX_F_I64 && op <= Opcode::V_CMPX_CLASS_F64) ||
(op >= Opcode::V_CMPX_F_U32 && op <= Opcode::V_CMPX_T_U32) ||
(op >= Opcode::V_CMPX_F_U64 && op <= Opcode::V_CMPX_T_U64)) {
return true;
}
return false;
}
enum class EncodingMask : u32 { enum class EncodingMask : u32 {
MASK_9bit = 0x000001FFULL << 23, MASK_9bit = 0x000001FFULL << 23,
MASK_7bit = 0x0000007FULL << 25, MASK_7bit = 0x0000007FULL << 25,