shader: Fix block processing order in dead code elimination pass
This commit is contained in:
parent
705d326a6d
commit
5aa3a4d4a0
|
@ -41,6 +41,7 @@ struct Block : Hook {
|
||||||
EndClass end_class{};
|
EndClass end_class{};
|
||||||
Block* branch_true{};
|
Block* branch_true{};
|
||||||
Block* branch_false{};
|
Block* branch_false{};
|
||||||
|
bool is_dummy{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class CFG {
|
class CFG {
|
||||||
|
|
|
@ -630,9 +630,11 @@ private:
|
||||||
break;
|
break;
|
||||||
case StatementType::Code: {
|
case StatementType::Code: {
|
||||||
ensure_block();
|
ensure_block();
|
||||||
|
if (!stmt.block->is_dummy) {
|
||||||
const u32 start = stmt.block->begin_index;
|
const u32 start = stmt.block->begin_index;
|
||||||
const u32 size = stmt.block->end_index - start + 1;
|
const u32 size = stmt.block->end_index - start + 1;
|
||||||
Translate(current_block, inst_list.subspan(start, size), info);
|
Translate(current_block, inst_list.subspan(start, size), info);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case StatementType::SetVariable: {
|
case StatementType::SetVariable: {
|
||||||
|
@ -808,7 +810,7 @@ private:
|
||||||
ObjectPool<IR::Inst>& inst_pool;
|
ObjectPool<IR::Inst>& inst_pool;
|
||||||
ObjectPool<IR::Block>& block_pool;
|
ObjectPool<IR::Block>& block_pool;
|
||||||
IR::AbstractSyntaxList& syntax_list;
|
IR::AbstractSyntaxList& syntax_list;
|
||||||
const Block dummy_flow_block{};
|
const Block dummy_flow_block{.is_dummy = true};
|
||||||
std::span<const GcnInst> inst_list;
|
std::span<const GcnInst> inst_list;
|
||||||
Info& info;
|
Info& info;
|
||||||
};
|
};
|
||||||
|
|
|
@ -104,18 +104,21 @@ void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||||
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const IR::U1 src0{GetSrc(inst.src[0])};
|
|
||||||
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
||||||
// Exec context push
|
// Exec context push
|
||||||
exec_contexts[inst.dst[0].code] = true;
|
exec_contexts[inst.dst[0].code] = true;
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec());
|
||||||
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||||
inst.src[0].field == OperandField::ScalarGPR) {
|
inst.src[0].field == OperandField::ScalarGPR) {
|
||||||
// Exec context pop
|
// Exec context pop
|
||||||
exec_contexts[inst.src[0].code] = false;
|
exec_contexts[inst.src[0].code] = false;
|
||||||
} else if (inst.src[0].field != OperandField::ConstZero) {
|
ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)));
|
||||||
|
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||||
|
inst.src[0].field == OperandField::ConstZero) {
|
||||||
|
ir.SetExec(ir.Imm1(false));
|
||||||
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
SetDst(inst.dst[0], src0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
||||||
|
|
|
@ -58,16 +58,13 @@ void Translator::EmitPrologue() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
// Input modifiers work on float values.
|
// Input modifiers work on float values.
|
||||||
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
||||||
|
|
||||||
IR::U1U32F32 value{};
|
IR::U32F32 value{};
|
||||||
switch (operand.field) {
|
switch (operand.field) {
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
if (exec_contexts[operand.code]) {
|
|
||||||
value = ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
|
||||||
}
|
|
||||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||||
} else {
|
} else {
|
||||||
|
@ -124,9 +121,6 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
case OperandField::ConstFloatNeg_2_0:
|
case OperandField::ConstFloatNeg_2_0:
|
||||||
value = ir.Imm32(-2.0f);
|
value = ir.Imm32(-2.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ExecLo:
|
|
||||||
value = ir.GetExec();
|
|
||||||
break;
|
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
if (force_flt) {
|
if (force_flt) {
|
||||||
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
||||||
|
@ -150,8 +144,8 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
|
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||||
IR::U1U32F32 result = value;
|
IR::U32F32 result = value;
|
||||||
if (operand.output_modifier.multiplier != 0.f) {
|
if (operand.output_modifier.multiplier != 0.f) {
|
||||||
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
||||||
}
|
}
|
||||||
|
@ -160,14 +154,9 @@ void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
|
||||||
}
|
}
|
||||||
switch (operand.field) {
|
switch (operand.field) {
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
if (value.Type() == IR::Type::U1) {
|
|
||||||
return ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), result);
|
|
||||||
}
|
|
||||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
||||||
case OperandField::VectorGPR:
|
case OperandField::VectorGPR:
|
||||||
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
||||||
case OperandField::ExecLo:
|
|
||||||
return ir.SetExec(result);
|
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
return ir.SetVccLo(result);
|
return ir.SetVccLo(result);
|
||||||
case OperandField::VccHi:
|
case OperandField::VccHi:
|
||||||
|
|
|
@ -124,8 +124,8 @@ public:
|
||||||
void EXP(const GcnInst& inst);
|
void EXP(const GcnInst& inst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::U1U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||||
void SetDst(const InstOperand& operand, const IR::U1U32F32& value);
|
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
|
|
|
@ -5,10 +5,10 @@
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
void DeadCodeEliminationPass(IR::Program& program) {
|
||||||
// We iterate over the instructions in reverse order.
|
// We iterate over the instructions in reverse order.
|
||||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||||
for (IR::Block* const block : program) {
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
auto it{block->end()};
|
auto it{block->end()};
|
||||||
while (it != block->begin()) {
|
while (it != block->begin()) {
|
||||||
--it;
|
--it;
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace Shader::Optimization {
|
||||||
|
|
||||||
void SsaRewritePass(IR::BlockList& program);
|
void SsaRewritePass(IR::BlockList& program);
|
||||||
void IdentityRemovalPass(IR::BlockList& program);
|
void IdentityRemovalPass(IR::BlockList& program);
|
||||||
void DeadCodeEliminationPass(IR::BlockList& program);
|
void DeadCodeEliminationPass(IR::Program& program);
|
||||||
void ConstantPropagationPass(IR::BlockList& program);
|
void ConstantPropagationPass(IR::BlockList& program);
|
||||||
void ResourceTrackingPass(IR::Program& program);
|
void ResourceTrackingPass(IR::Program& program);
|
||||||
void CollectShaderInfoPass(IR::Program& program);
|
void CollectShaderInfoPass(IR::Program& program);
|
||||||
|
|
|
@ -219,7 +219,6 @@ using U64 = TypedValue<Type::U64>;
|
||||||
using F16 = TypedValue<Type::F16>;
|
using F16 = TypedValue<Type::F16>;
|
||||||
using F32 = TypedValue<Type::F32>;
|
using F32 = TypedValue<Type::F32>;
|
||||||
using F64 = TypedValue<Type::F64>;
|
using F64 = TypedValue<Type::F64>;
|
||||||
using U1U32F32 = TypedValue<Type::U1 | Type::U32 | Type::F32>;
|
|
||||||
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
using U32F32 = TypedValue<Type::U32 | Type::F32>;
|
||||||
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||||
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
using F32F64 = TypedValue<Type::F32 | Type::F64>;
|
||||||
|
|
|
@ -58,7 +58,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
|
|
||||||
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
|
fmt::print("Post passes\n\n{}\n", Shader::IR::DumpProgram(program));
|
||||||
|
|
|
@ -216,21 +216,13 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vk::ImageSubresourceRange range = {
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
};
|
|
||||||
|
|
||||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
for (u32 l = 0; l < image.info.resources.layers; l++) {
|
|
||||||
// Upload data to the staging buffer.
|
|
||||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||||
const u32 width = image.info.size.width >> m;
|
const u32 width = image.info.size.width >> m;
|
||||||
const u32 height = image.info.size.height >> m;
|
const u32 height = image.info.size.height >> m;
|
||||||
const u32 map_size = width * height;
|
const u32 map_size = width * height * image.info.resources.layers;
|
||||||
|
|
||||||
|
// Upload data to the staging buffer.
|
||||||
const auto [data, offset, _] = staging.Map(map_size, 16);
|
const auto [data, offset, _] = staging.Map(map_size, 16);
|
||||||
if (image.info.is_tiled) {
|
if (image.info.is_tiled) {
|
||||||
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
||||||
|
@ -248,8 +240,8 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
.imageSubresource{
|
.imageSubresource{
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
.mipLevel = m,
|
.mipLevel = m,
|
||||||
.baseArrayLayer = l,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = 1,
|
.layerCount = u32(image.info.resources.layers),
|
||||||
},
|
},
|
||||||
.imageOffset = {0, 0, 0},
|
.imageOffset = {0, 0, 0},
|
||||||
.imageExtent = {width, height, 1},
|
.imageExtent = {width, height, 1},
|
||||||
|
@ -265,7 +257,6 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||||
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
|
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
|
||||||
|
|
Loading…
Reference in New Issue