Merge pull request #165 from shadps4-emu/bad-phi
shader_recompiler: Address some minor issues
This commit is contained in:
commit
3faeba8f0c
|
@ -14,8 +14,8 @@ namespace Libraries::Kernel {
|
||||||
|
|
||||||
struct OrbisTimesec {
|
struct OrbisTimesec {
|
||||||
time_t t;
|
time_t t;
|
||||||
u64 west_sec;
|
u32 west_sec;
|
||||||
u64 dst_sec;
|
u32 dst_sec;
|
||||||
};
|
};
|
||||||
|
|
||||||
int32_t PS4_SYSV_ABI sceKernelReleaseDirectMemory(off_t start, size_t len);
|
int32_t PS4_SYSV_ABI sceKernelReleaseDirectMemory(off_t start, size_t len);
|
||||||
|
|
|
@ -95,9 +95,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::FragCoord:
|
case IR::Attribute::FragCoord: {
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
const Id coord = ctx.OpLoad(
|
||||||
ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
|
ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
|
||||||
|
if (comp == 3) {
|
||||||
|
return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord);
|
||||||
|
}
|
||||||
|
return coord;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Read attribute {}", attr);
|
throw NotImplementedException("Read attribute {}", attr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ struct Block : Hook {
|
||||||
EndClass end_class{};
|
EndClass end_class{};
|
||||||
Block* branch_true{};
|
Block* branch_true{};
|
||||||
Block* branch_false{};
|
Block* branch_false{};
|
||||||
|
bool is_dummy{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class CFG {
|
class CFG {
|
||||||
|
|
|
@ -630,9 +630,11 @@ private:
|
||||||
break;
|
break;
|
||||||
case StatementType::Code: {
|
case StatementType::Code: {
|
||||||
ensure_block();
|
ensure_block();
|
||||||
|
if (!stmt.block->is_dummy) {
|
||||||
const u32 start = stmt.block->begin_index;
|
const u32 start = stmt.block->begin_index;
|
||||||
const u32 size = stmt.block->end_index - start + 1;
|
const u32 size = stmt.block->end_index - start + 1;
|
||||||
Translate(current_block, inst_list.subspan(start, size), info);
|
Translate(current_block, inst_list.subspan(start, size), info);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case StatementType::SetVariable: {
|
case StatementType::SetVariable: {
|
||||||
|
@ -808,7 +810,7 @@ private:
|
||||||
ObjectPool<IR::Inst>& inst_pool;
|
ObjectPool<IR::Inst>& inst_pool;
|
||||||
ObjectPool<IR::Block>& block_pool;
|
ObjectPool<IR::Block>& block_pool;
|
||||||
IR::AbstractSyntaxList& syntax_list;
|
IR::AbstractSyntaxList& syntax_list;
|
||||||
const Block dummy_flow_block{};
|
const Block dummy_flow_block{.is_dummy = true};
|
||||||
std::span<const GcnInst> inst_list;
|
std::span<const GcnInst> inst_list;
|
||||||
Info& info;
|
Info& info;
|
||||||
};
|
};
|
||||||
|
|
|
@ -55,26 +55,48 @@ void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
||||||
const IR::U1 src0{get_src(inst.src[0])};
|
const IR::U1 src0{get_src(inst.src[0])};
|
||||||
const IR::U1 src1{get_src(inst.src[1])};
|
const IR::U1 src1{get_src(inst.src[1])};
|
||||||
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
|
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
|
||||||
SetDst(inst.dst[0], result);
|
|
||||||
ir.SetScc(result);
|
ir.SetScc(result);
|
||||||
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ExecLo:
|
||||||
|
ir.SetExec(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||||
|
ASSERT(inst.src[0].field == OperandField::VccLo);
|
||||||
const IR::U1 exec{ir.GetExec()};
|
const IR::U1 exec{ir.GetExec()};
|
||||||
|
const IR::U1 vcc{ir.GetVcc()};
|
||||||
|
|
||||||
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
|
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
|
||||||
// IR instruction whenever it's loaded.
|
// IR instruction whenever it's loaded.
|
||||||
ASSERT(inst.dst[0].field == OperandField::ScalarGPR);
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::ScalarGPR: {
|
||||||
const u32 reg = inst.dst[0].code;
|
const u32 reg = inst.dst[0].code;
|
||||||
exec_contexts[reg] = true;
|
exec_contexts[reg] = true;
|
||||||
ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec);
|
ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(exec);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
// Update EXEC.
|
// Update EXEC.
|
||||||
ASSERT(inst.src[0].field == OperandField::VccLo);
|
ir.SetExec(ir.LogicalAnd(exec, vcc));
|
||||||
ir.SetExec(ir.LogicalAnd(exec, ir.GetVcc()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||||
|
@ -82,18 +104,21 @@ void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||||
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const IR::U1 src0{GetSrc(inst.src[0])};
|
|
||||||
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
||||||
// Exec context push
|
// Exec context push
|
||||||
exec_contexts[inst.dst[0].code] = true;
|
exec_contexts[inst.dst[0].code] = true;
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec());
|
||||||
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||||
inst.src[0].field == OperandField::ScalarGPR) {
|
inst.src[0].field == OperandField::ScalarGPR) {
|
||||||
// Exec context pop
|
// Exec context pop
|
||||||
exec_contexts[inst.src[0].code] = false;
|
exec_contexts[inst.src[0].code] = false;
|
||||||
} else if (inst.src[0].field != OperandField::ConstZero) {
|
ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)));
|
||||||
|
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||||
|
inst.src[0].field == OperandField::ConstZero) {
|
||||||
|
ir.SetExec(ir.Imm1(false));
|
||||||
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
SetDst(inst.dst[0], src0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
||||||
|
@ -114,9 +139,17 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
||||||
if (negate) {
|
if (negate) {
|
||||||
result = ir.LogicalNot(result);
|
result = ir.LogicalNot(result);
|
||||||
}
|
}
|
||||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
|
||||||
ir.SetVcc(result);
|
|
||||||
ir.SetScc(result);
|
ir.SetScc(result);
|
||||||
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_AND_B64(const GcnInst& inst) {
|
void Translator::S_AND_B64(const GcnInst& inst) {
|
||||||
|
@ -135,9 +168,17 @@ void Translator::S_AND_B64(const GcnInst& inst) {
|
||||||
const IR::U1 src0{get_src(inst.src[0])};
|
const IR::U1 src0{get_src(inst.src[0])};
|
||||||
const IR::U1 src1{get_src(inst.src[1])};
|
const IR::U1 src1{get_src(inst.src[1])};
|
||||||
const IR::U1 result = ir.LogicalAnd(src0, src1);
|
const IR::U1 result = ir.LogicalAnd(src0, src1);
|
||||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
|
||||||
ir.SetVcc(result);
|
|
||||||
ir.SetScc(result);
|
ir.SetScc(result);
|
||||||
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||||
|
@ -169,6 +210,36 @@ void Translator::S_CSELECT_B32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_CSELECT_B64(const GcnInst& inst) {
|
||||||
|
const auto get_src = [&](const InstOperand& operand) {
|
||||||
|
switch (operand.field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
return ir.GetVcc();
|
||||||
|
case OperandField::ExecLo:
|
||||||
|
return ir.GetExec();
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||||
|
case OperandField::ConstZero:
|
||||||
|
return ir.Imm1(false);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const IR::U1 src0{get_src(inst.src[0])};
|
||||||
|
const IR::U1 src1{get_src(inst.src[1])};
|
||||||
|
const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)};
|
||||||
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
@ -179,4 +250,12 @@ void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_LSHL_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 result = ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)));
|
||||||
|
SetDst(inst.dst[0], result);
|
||||||
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -5,30 +5,16 @@
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
|
|
||||||
const IR::U32U64& address) {
|
|
||||||
for (u32 i = 0; i < num_dwords; i++) {
|
|
||||||
if (handle.IsEmpty()) {
|
|
||||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
|
|
||||||
} else {
|
|
||||||
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
|
|
||||||
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
const auto& smrd = inst.control.smrd;
|
const auto& smrd = inst.control.smrd;
|
||||||
|
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
|
||||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||||
const IR::U32 offset =
|
const IR::Value base =
|
||||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||||
ir.Imm32(2))};
|
for (u32 i = 0; i < num_dwords; i++) {
|
||||||
const IR::U64 base =
|
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
|
||||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)));
|
}
|
||||||
const IR::U64 address = ir.IAdd(base, offset);
|
|
||||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
|
||||||
Load(ir, num_dwords, {}, dst_reg, address);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
|
@ -37,8 +23,11 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
const IR::U32 dword_offset =
|
const IR::U32 dword_offset =
|
||||||
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
|
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
|
||||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||||
Load(ir, num_dwords, vsharp, dst_reg, dword_offset);
|
for (u32 i = 0; i < num_dwords; i++) {
|
||||||
|
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||||
|
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -58,16 +58,13 @@ void Translator::EmitPrologue() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
// Input modifiers work on float values.
|
// Input modifiers work on float values.
|
||||||
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
||||||
|
|
||||||
IR::U1U32F32 value{};
|
IR::U32F32 value{};
|
||||||
switch (operand.field) {
|
switch (operand.field) {
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
if (exec_contexts[operand.code]) {
|
|
||||||
value = ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
|
||||||
}
|
|
||||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||||
} else {
|
} else {
|
||||||
|
@ -124,11 +121,12 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
case OperandField::ConstFloatNeg_2_0:
|
case OperandField::ConstFloatNeg_2_0:
|
||||||
value = ir.Imm32(-2.0f);
|
value = ir.Imm32(-2.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ExecLo:
|
|
||||||
value = ir.GetExec();
|
|
||||||
break;
|
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
|
if (force_flt) {
|
||||||
|
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
||||||
|
} else {
|
||||||
value = ir.GetVccLo();
|
value = ir.GetVccLo();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case OperandField::VccHi:
|
case OperandField::VccHi:
|
||||||
value = ir.GetVccHi();
|
value = ir.GetVccHi();
|
||||||
|
@ -146,8 +144,8 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
|
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||||
IR::U1U32F32 result = value;
|
IR::U32F32 result = value;
|
||||||
if (operand.output_modifier.multiplier != 0.f) {
|
if (operand.output_modifier.multiplier != 0.f) {
|
||||||
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
||||||
}
|
}
|
||||||
|
@ -156,14 +154,9 @@ void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
|
||||||
}
|
}
|
||||||
switch (operand.field) {
|
switch (operand.field) {
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
if (value.Type() == IR::Type::U1) {
|
|
||||||
return ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), result);
|
|
||||||
}
|
|
||||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
||||||
case OperandField::VectorGPR:
|
case OperandField::VectorGPR:
|
||||||
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
||||||
case OperandField::ExecLo:
|
|
||||||
return ir.SetExec(result);
|
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
return ir.SetVccLo(result);
|
return ir.SetVccLo(result);
|
||||||
case OperandField::VccHi:
|
case OperandField::VccHi:
|
||||||
|
@ -252,6 +245,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
break;
|
break;
|
||||||
case Opcode::S_WAITCNT:
|
case Opcode::S_WAITCNT:
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_LOAD_DWORDX4:
|
||||||
|
translator.S_LOAD_DWORD(4, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_LOAD_DWORDX8:
|
||||||
|
translator.S_LOAD_DWORD(8, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||||
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -352,9 +351,18 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_CMP_LG_U32:
|
case Opcode::S_CMP_LG_U32:
|
||||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_CMP_LG_I32:
|
||||||
|
translator.S_CMP(ConditionOp::LG, true, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_CMP_EQ_I32:
|
case Opcode::S_CMP_EQ_I32:
|
||||||
translator.S_CMP(ConditionOp::EQ, true, inst);
|
translator.S_CMP(ConditionOp::EQ, true, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_CMP_EQ_U32:
|
||||||
|
translator.S_CMP(ConditionOp::EQ, false, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_LSHL_B32:
|
||||||
|
translator.S_LSHL_B32(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_CNDMASK_B32:
|
case Opcode::V_CNDMASK_B32:
|
||||||
translator.V_CNDMASK_B32(inst);
|
translator.V_CNDMASK_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -505,13 +513,21 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_CSELECT_B32:
|
case Opcode::S_CSELECT_B32:
|
||||||
translator.S_CSELECT_B32(inst);
|
translator.S_CSELECT_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_CSELECT_B64:
|
||||||
|
translator.S_CSELECT_B64(inst);
|
||||||
|
break;
|
||||||
case Opcode::S_BFE_U32:
|
case Opcode::S_BFE_U32:
|
||||||
translator.S_BFE_U32(inst);
|
translator.S_BFE_U32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_RNDNE_F32:
|
||||||
|
translator.V_RNDNE_F32(inst);
|
||||||
|
break;
|
||||||
case Opcode::S_NOP:
|
case Opcode::S_NOP:
|
||||||
case Opcode::S_CBRANCH_EXECZ:
|
case Opcode::S_CBRANCH_EXECZ:
|
||||||
case Opcode::S_CBRANCH_SCC0:
|
case Opcode::S_CBRANCH_SCC0:
|
||||||
case Opcode::S_CBRANCH_SCC1:
|
case Opcode::S_CBRANCH_SCC1:
|
||||||
|
case Opcode::S_CBRANCH_VCCNZ:
|
||||||
|
case Opcode::S_CBRANCH_VCCZ:
|
||||||
case Opcode::S_BRANCH:
|
case Opcode::S_BRANCH:
|
||||||
case Opcode::S_WQM_B64:
|
case Opcode::S_WQM_B64:
|
||||||
case Opcode::V_INTERP_P1_F32:
|
case Opcode::V_INTERP_P1_F32:
|
||||||
|
|
|
@ -46,7 +46,9 @@ public:
|
||||||
void S_AND_B32(const GcnInst& inst);
|
void S_AND_B32(const GcnInst& inst);
|
||||||
void S_LSHR_B32(const GcnInst& inst);
|
void S_LSHR_B32(const GcnInst& inst);
|
||||||
void S_CSELECT_B32(const GcnInst& inst);
|
void S_CSELECT_B32(const GcnInst& inst);
|
||||||
|
void S_CSELECT_B64(const GcnInst& inst);
|
||||||
void S_BFE_U32(const GcnInst& inst);
|
void S_BFE_U32(const GcnInst& inst);
|
||||||
|
void S_LSHL_B32(const GcnInst& inst);
|
||||||
|
|
||||||
// Scalar Memory
|
// Scalar Memory
|
||||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||||
|
@ -101,6 +103,7 @@ public:
|
||||||
void V_LSHR_B32(const GcnInst& inst);
|
void V_LSHR_B32(const GcnInst& inst);
|
||||||
void V_ASHRREV_I32(const GcnInst& inst);
|
void V_ASHRREV_I32(const GcnInst& inst);
|
||||||
void V_MAD_U32_U24(const GcnInst& inst);
|
void V_MAD_U32_U24(const GcnInst& inst);
|
||||||
|
void V_RNDNE_F32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||||
|
@ -121,8 +124,8 @@ public:
|
||||||
void EXP(const GcnInst& inst);
|
void EXP(const GcnInst& inst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::U1U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||||
void SetDst(const InstOperand& operand, const IR::U1U32F32& value);
|
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
|
|
|
@ -33,7 +33,7 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
const IR::ScalarReg flag_reg{inst.src[2].code};
|
const IR::ScalarReg flag_reg{inst.src[2].code};
|
||||||
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
||||||
? ir.INotEqual(ir.GetScalarReg(flag_reg), ir.Imm32(0U))
|
? ir.GetThreadBitScalarReg(flag_reg)
|
||||||
: ir.GetVcc();
|
: ir.GetVcc();
|
||||||
|
|
||||||
// We can treat the instruction as integer most of the time, but when a source is
|
// We can treat the instruction as integer most of the time, but when a source is
|
||||||
|
@ -85,21 +85,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAD_F32(const GcnInst& inst) {
|
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,14 +114,14 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
||||||
|
|
||||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||||
}
|
}
|
||||||
|
@ -167,7 +167,17 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
switch (inst.dst[1].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
ir.SetVcc(result);
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAX_F32(const GcnInst& inst) {
|
void Translator::V_MAX_F32(const GcnInst& inst) {
|
||||||
|
@ -357,4 +367,9 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
||||||
V_MAD_I32_I24(inst);
|
V_MAD_I32_I24(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_RNDNE_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
|
SetDst(inst.dst[0], ir.FPRoundEven(src0));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -273,8 +273,8 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::ReadConst(const U64& address, const U32& offset) {
|
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||||
return Inst<U32>(Opcode::ReadConst, address, offset);
|
return Inst<U32>(Opcode::ReadConst, base, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||||
|
|
|
@ -77,7 +77,7 @@ public:
|
||||||
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||||
|
|
||||||
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
|
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||||
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||||
|
|
||||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||||
|
|
|
@ -15,7 +15,7 @@ OPCODE(Epilogue, Void,
|
||||||
OPCODE(Discard, Void, )
|
OPCODE(Discard, Void, )
|
||||||
|
|
||||||
// Constant memory operations
|
// Constant memory operations
|
||||||
OPCODE(ReadConst, U32, U64, U32, )
|
OPCODE(ReadConst, U32, U32x2, U32, )
|
||||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,10 @@
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
void DeadCodeEliminationPass(IR::Program& program) {
|
||||||
// We iterate over the instructions in reverse order.
|
// We iterate over the instructions in reverse order.
|
||||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||||
for (IR::Block* const block : program) {
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
auto it{block->end()};
|
auto it{block->end()};
|
||||||
while (it != block->begin()) {
|
while (it != block->begin()) {
|
||||||
--it;
|
--it;
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Shader::Optimization {
|
||||||
|
|
||||||
void SsaRewritePass(IR::BlockList& program);
|
void SsaRewritePass(IR::BlockList& program);
|
||||||
void IdentityRemovalPass(IR::BlockList& program);
|
void IdentityRemovalPass(IR::BlockList& program);
|
||||||
void DeadCodeEliminationPass(IR::BlockList& program);
|
void DeadCodeEliminationPass(IR::Program& program);
|
||||||
void ConstantPropagationPass(IR::BlockList& program);
|
void ConstantPropagationPass(IR::BlockList& program);
|
||||||
void ResourceTrackingPass(IR::Program& program);
|
void ResourceTrackingPass(IR::Program& program);
|
||||||
void CollectShaderInfoPass(IR::Program& program);
|
void CollectShaderInfoPass(IR::Program& program);
|
||||||
|
|
|
@ -157,16 +157,16 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
||||||
|
|
||||||
// Retrieve offset from base.
|
// Retrieve offset from base.
|
||||||
IR::Inst* addr = inst->Arg(0).InstRecursive();
|
const u32 dword_offset = inst->Arg(1).U32();
|
||||||
u32 dword_offset = addr->Arg(1).U32();
|
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
|
||||||
addr = addr->Arg(0).InstRecursive();
|
|
||||||
ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported");
|
|
||||||
dword_offset += addr->Arg(1).U32() >> 2;
|
|
||||||
|
|
||||||
// Retrieve SGPR that holds sbase
|
// Retrieve SGPR pair that holds sbase
|
||||||
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
|
||||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
|
const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
|
||||||
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
|
||||||
|
sbase1->GetOpcode() == IR::Opcode::GetUserData,
|
||||||
|
"Nested resource loads not supported");
|
||||||
|
const IR::ScalarReg base = sbase0->Arg(0).ScalarReg();
|
||||||
|
|
||||||
// Return retrieved location.
|
// Return retrieved location.
|
||||||
return SharpLocation{
|
return SharpLocation{
|
||||||
|
@ -186,7 +186,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.num_records = u32(buffer.num_records),
|
.num_records = u32(buffer.num_records),
|
||||||
.used_types = BufferDataType(inst),
|
.used_types = BufferDataType(inst),
|
||||||
.is_storage = true || IsBufferStore(inst),
|
.is_storage = IsBufferStore(inst),
|
||||||
});
|
});
|
||||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
@ -206,8 +206,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||||
IR::U32 address = ir.Imm32(dword_offset);
|
IR::U32 address = ir.Imm32(dword_offset);
|
||||||
if (inst_info.index_enable && inst_info.offset_enable) {
|
if (inst_info.index_enable && inst_info.offset_enable) {
|
||||||
const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 0)};
|
const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 1)};
|
||||||
const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 1)};
|
const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 0)};
|
||||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||||
address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2)));
|
address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2)));
|
||||||
} else if (inst_info.index_enable) {
|
} else if (inst_info.index_enable) {
|
||||||
|
|
|
@ -219,7 +219,6 @@ using U64 = TypedValue<Type::U64>;
|
||||||
using F16 = TypedValue<Type::F16>;
|
using F16 = TypedValue<Type::F16>;
|
||||||
using F32 = TypedValue<Type::F32>;
|
using F32 = TypedValue<Type::F32>;
|
||||||
using F64 = TypedValue<Type::F64>;
|
using F64 = TypedValue<Type::F64>;
|
||||||
using U1U32F32 = TypedValue<Type::U1 | Type::U32 | Type::F32>;
|
|
||||||
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
||||||
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||||
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
||||||
|
|
|
@ -58,7 +58,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
|
|
||||||
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
|
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
|
||||||
|
|
|
@ -252,6 +252,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::DrawIndexOffset2: {
|
||||||
|
const auto* draw_index_off = reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
|
||||||
|
regs.max_index_size = draw_index_off->max_size;
|
||||||
|
regs.num_indices = draw_index_off->index_count;
|
||||||
|
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||||
|
if (rasterizer) {
|
||||||
|
rasterizer->Draw(true, draw_index_off->index_offset);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::DrawIndexAuto: {
|
case PM4ItOpcode::DrawIndexAuto: {
|
||||||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
||||||
regs.num_indices = draw_index->index_count;
|
regs.num_indices = draw_index->index_count;
|
||||||
|
@ -272,6 +282,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::NumInstances: {
|
||||||
|
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
||||||
|
regs.num_instances.num_instances = num_instances->num_instances;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PM4ItOpcode::IndexBase: {
|
||||||
|
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
|
||||||
|
regs.index_base_address.base_addr_lo = index_base->addr_lo;
|
||||||
|
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::EventWrite: {
|
case PM4ItOpcode::EventWrite: {
|
||||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -548,4 +548,15 @@ struct PM4CmdDispatchDirect {
|
||||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PM4CmdDrawNumInstances {
|
||||||
|
PM4Type3Header header;
|
||||||
|
u32 num_instances;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PM4CmdDrawIndexBase {
|
||||||
|
PM4Type3Header header;
|
||||||
|
u32 addr_lo;
|
||||||
|
u32 addr_hi;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
|
@ -14,6 +14,8 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
|
||||||
return vk::StencilOp::eKeep;
|
return vk::StencilOp::eKeep;
|
||||||
case Liverpool::StencilFunc::Zero:
|
case Liverpool::StencilFunc::Zero:
|
||||||
return vk::StencilOp::eZero;
|
return vk::StencilOp::eZero;
|
||||||
|
case Liverpool::StencilFunc::ReplaceTest:
|
||||||
|
return vk::StencilOp::eReplace;
|
||||||
case Liverpool::StencilFunc::AddClamp:
|
case Liverpool::StencilFunc::AddClamp:
|
||||||
return vk::StencilOp::eIncrementAndClamp;
|
return vk::StencilOp::eIncrementAndClamp;
|
||||||
case Liverpool::StencilFunc::SubClamp:
|
case Liverpool::StencilFunc::SubClamp:
|
||||||
|
@ -307,6 +309,13 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
|
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||||
return vk::Format::eBc3SrgbBlock;
|
return vk::Format::eBc3SrgbBlock;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Sint) {
|
||||||
|
return vk::Format::eR16G16B16A16Sint;
|
||||||
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) {
|
||||||
|
return vk::Format::eBc7SrgbBlock;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,8 +81,17 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||||
|
|
||||||
ComputePipeline::~ComputePipeline() = default;
|
ComputePipeline::~ComputePipeline() = default;
|
||||||
|
|
||||||
void ComputePipeline::BindResources(Core::MemoryManager* memory,
|
void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
VideoCore::TextureCache& texture_cache) const {
|
VideoCore::TextureCache& texture_cache) const {
|
||||||
|
static constexpr u64 MinUniformAlignment = 64;
|
||||||
|
|
||||||
|
const auto map_staging = [&](auto src, size_t size) {
|
||||||
|
const auto [data, offset, _] = staging.Map(size, MinUniformAlignment);
|
||||||
|
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||||
|
staging.Commit(size);
|
||||||
|
return offset;
|
||||||
|
};
|
||||||
|
|
||||||
// Bind resource buffers and textures.
|
// Bind resource buffers and textures.
|
||||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||||
|
@ -94,8 +103,9 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory,
|
||||||
const u32 size = vsharp.GetSize();
|
const u32 size = vsharp.GetSize();
|
||||||
const VAddr addr = vsharp.base_address.Value();
|
const VAddr addr = vsharp.base_address.Value();
|
||||||
texture_cache.OnCpuWrite(addr);
|
texture_cache.OnCpuWrite(addr);
|
||||||
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
const u32 offset = map_staging(addr, size);
|
||||||
buffer_infos.emplace_back(vk_buffer, offset, size);
|
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
|
||||||
|
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
.dstBinding = binding++,
|
.dstBinding = binding++,
|
||||||
|
|
|
@ -31,7 +31,8 @@ public:
|
||||||
return *pipeline;
|
return *pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindResources(Core::MemoryManager* memory, VideoCore::TextureCache& texture_cache) const;
|
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
|
VideoCore::TextureCache& texture_cache) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
|
|
|
@ -32,10 +32,10 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
|
|
||||||
Rasterizer::~Rasterizer() = default;
|
Rasterizer::~Rasterizer() = default;
|
||||||
|
|
||||||
void Rasterizer::Draw(bool is_indexed) {
|
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
const u32 num_indices = SetupIndexBuffer(is_indexed);
|
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
|
||||||
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
|
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
|
||||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
|
|
||||||
|
@ -85,17 +85,16 @@ void Rasterizer::Draw(bool is_indexed) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::DispatchDirect() {
|
void Rasterizer::DispatchDirect() {
|
||||||
return;
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
const auto& cs_program = liverpool->regs.cs_program;
|
const auto& cs_program = liverpool->regs.cs_program;
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
pipeline->BindResources(memory, texture_cache);
|
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
|
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
||||||
// Emulate QuadList primitive type with CPU made index buffer.
|
// Emulate QuadList primitive type with CPU made index buffer.
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
|
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
|
||||||
|
@ -131,7 +130,8 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
||||||
|
|
||||||
// Bind index buffer.
|
// Bind index buffer.
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, index_type);
|
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset + index_offset * index_size,
|
||||||
|
index_type);
|
||||||
return regs.num_indices;
|
return regs.num_indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,12 +29,12 @@ public:
|
||||||
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
|
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
|
||||||
~Rasterizer();
|
~Rasterizer();
|
||||||
|
|
||||||
void Draw(bool is_indexed);
|
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||||
|
|
||||||
void DispatchDirect();
|
void DispatchDirect();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
u32 SetupIndexBuffer(bool& is_indexed);
|
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||||
void MapMemory(VAddr addr, size_t size);
|
void MapMemory(VAddr addr, size_t size);
|
||||||
|
|
||||||
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
||||||
|
|
|
@ -116,7 +116,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
|
||||||
std::unique_lock lock{m_page_table};
|
std::unique_lock lock{m_page_table};
|
||||||
boost::container::small_vector<ImageId, 2> image_ids;
|
boost::container::small_vector<ImageId, 2> image_ids;
|
||||||
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
||||||
if (image.cpu_addr == cpu_address) {
|
if (image.cpu_addr == cpu_address && image.info.size.width == info.size.width) {
|
||||||
image_ids.push_back(image_id);
|
image_ids.push_back(image_id);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -216,21 +216,13 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vk::ImageSubresourceRange range = {
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
};
|
|
||||||
|
|
||||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
for (u32 l = 0; l < image.info.resources.layers; l++) {
|
|
||||||
// Upload data to the staging buffer.
|
|
||||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||||
const u32 width = image.info.size.width >> m;
|
const u32 width = image.info.size.width >> m;
|
||||||
const u32 height = image.info.size.height >> m;
|
const u32 height = image.info.size.height >> m;
|
||||||
const u32 map_size = width * height;
|
const u32 map_size = width * height * image.info.resources.layers;
|
||||||
|
|
||||||
|
// Upload data to the staging buffer.
|
||||||
const auto [data, offset, _] = staging.Map(map_size, 16);
|
const auto [data, offset, _] = staging.Map(map_size, 16);
|
||||||
if (image.info.is_tiled) {
|
if (image.info.is_tiled) {
|
||||||
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
||||||
|
@ -248,8 +240,8 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
.imageSubresource{
|
.imageSubresource{
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
.mipLevel = m,
|
.mipLevel = m,
|
||||||
.baseArrayLayer = l,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = 1,
|
.layerCount = u32(image.info.resources.layers),
|
||||||
},
|
},
|
||||||
.imageOffset = {0, 0, 0},
|
.imageOffset = {0, 0, 0},
|
||||||
.imageExtent = {width, height, 1},
|
.imageExtent = {width, height, 1},
|
||||||
|
@ -265,7 +257,6 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||||
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
|
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
|
||||||
|
|
Loading…
Reference in New Issue