control_flow_graph: Initial divergence handling
This commit is contained in:
parent
9adc638220
commit
ca674b4ea9
|
@ -10,7 +10,7 @@
|
|||
#include <arpa/inet.h>
|
||||
#endif
|
||||
|
||||
#include <common/assert.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/libs.h"
|
||||
|
|
|
@ -35,15 +35,22 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
return IR::Condition::Execz;
|
||||
case Opcode::S_CBRANCH_EXECNZ:
|
||||
return IR::Condition::Execnz;
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
case Opcode::S_ANDN2_B64:
|
||||
return IR::Condition::Execnz;
|
||||
default:
|
||||
return IR::Condition::True;
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr size_t LabelReserveSize = 32;
|
||||
|
||||
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||
index_to_pc.resize(inst_list.size() + 1);
|
||||
labels.reserve(LabelReserveSize);
|
||||
EmitLabels();
|
||||
EmitDivergenceLabels();
|
||||
EmitBlocks();
|
||||
LinkBlocks();
|
||||
}
|
||||
|
@ -51,14 +58,7 @@ CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_l
|
|||
void CFG::EmitLabels() {
|
||||
// Always set a label at entry point.
|
||||
u32 pc = 0;
|
||||
labels.push_back(pc);
|
||||
|
||||
const auto add_label = [this](u32 address) {
|
||||
const auto it = std::ranges::find(labels, address);
|
||||
if (it == labels.end()) {
|
||||
labels.push_back(address);
|
||||
}
|
||||
};
|
||||
AddLabel(pc);
|
||||
|
||||
// Iterate instruction list and add labels to branch targets.
|
||||
for (u32 i = 0; i < inst_list.size(); i++) {
|
||||
|
@ -66,15 +66,15 @@ void CFG::EmitLabels() {
|
|||
const GcnInst inst = inst_list[i];
|
||||
if (inst.IsUnconditionalBranch()) {
|
||||
const u32 target = inst.BranchTarget(pc);
|
||||
add_label(target);
|
||||
AddLabel(target);
|
||||
} else if (inst.IsConditionalBranch()) {
|
||||
const u32 true_label = inst.BranchTarget(pc);
|
||||
const u32 false_label = pc + inst.length;
|
||||
add_label(true_label);
|
||||
add_label(false_label);
|
||||
AddLabel(true_label);
|
||||
AddLabel(false_label);
|
||||
} else if (inst.opcode == Opcode::S_ENDPGM) {
|
||||
const u32 next_label = pc + inst.length;
|
||||
add_label(next_label);
|
||||
AddLabel(next_label);
|
||||
}
|
||||
pc += inst.length;
|
||||
}
|
||||
|
@ -84,16 +84,76 @@ void CFG::EmitLabels() {
|
|||
std::ranges::sort(labels);
|
||||
}
|
||||
|
||||
void CFG::EmitBlocks() {
|
||||
const auto get_index = [this](Label label) -> size_t {
|
||||
if (label == 0) {
|
||||
return 0ULL;
|
||||
}
|
||||
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
||||
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
||||
return std::distance(index_to_pc.begin(), it_index);
|
||||
void CFG::EmitDivergenceLabels() {
|
||||
const auto is_open_scope = [](const GcnInst& inst) {
|
||||
// An open scope instruction is an instruction that modifies EXEC
|
||||
// but also saves the previous value to restore later. This indicates
|
||||
// we are entering a scope.
|
||||
return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
||||
// While this instruction does not save EXEC it is often used paired
|
||||
// with SAVEEXEC to mask the threads that didn't pass the condition
|
||||
// of initial branch.
|
||||
inst.opcode == Opcode::S_ANDN2_B64;
|
||||
};
|
||||
const auto is_close_scope = [](const GcnInst& inst) {
|
||||
// Closing an EXEC scope can be either a branch instruction
|
||||
// (typical case when S_AND_SAVEEXEC_B64 is right before a branch)
|
||||
// or by a move instruction to EXEC that restores the backup.
|
||||
return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
||||
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
||||
// Those instructions need to be wrapped in the condition as well so allow branch
|
||||
// as end scope instruction.
|
||||
inst.opcode == Opcode::S_CBRANCH_EXECZ;
|
||||
};
|
||||
|
||||
// Since we will be adding new labels, avoid iterating those as well.
|
||||
const size_t end_size = labels.size();
|
||||
for (u32 l = 0; l < end_size; l++) {
|
||||
const Label start = labels[l];
|
||||
// Stop if we reached end of existing labels.
|
||||
if (l == end_size - 1) {
|
||||
break;
|
||||
}
|
||||
const Label end = labels[l + 1];
|
||||
const size_t end_index = GetIndex(end);
|
||||
|
||||
s32 curr_begin = -1;
|
||||
for (size_t index = GetIndex(start); index < end_index; index++) {
|
||||
const auto& inst = inst_list[index];
|
||||
// Mark a potential start of an exec scope.
|
||||
if (is_open_scope(inst)) {
|
||||
curr_begin = index;
|
||||
continue;
|
||||
}
|
||||
if (is_close_scope(inst) && curr_begin != -1) {
|
||||
// If there are no instructions inside scope don't do anything.
|
||||
if (index - curr_begin == 1) {
|
||||
curr_begin = -1;
|
||||
continue;
|
||||
}
|
||||
// Ensure the register holding EXEC is the same as the one saved.
|
||||
const u32 backup_sreg = inst_list[curr_begin].dst[0].code;
|
||||
const u32 restore_sreg = inst.src[0].code;
|
||||
if (inst.opcode == Opcode::S_MOV_B64 && backup_sreg != restore_sreg) {
|
||||
continue;
|
||||
}
|
||||
// Add a label to the instruction right after the open scope call.
|
||||
// It is the start of a new basic block.
|
||||
const auto& save_inst = inst_list[curr_begin];
|
||||
const Label label = index_to_pc[curr_begin] + save_inst.length;
|
||||
AddLabel(label);
|
||||
// Add a label to the close scope instruction as well.
|
||||
AddLabel(index_to_pc[index]);
|
||||
curr_begin = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort labels to make sure block insertion is correct.
|
||||
std::ranges::sort(labels);
|
||||
}
|
||||
|
||||
void CFG::EmitBlocks() {
|
||||
for (auto it = labels.begin(); it != labels.end(); it++) {
|
||||
const Label start = *it;
|
||||
const auto next_it = std::next(it);
|
||||
|
@ -102,8 +162,10 @@ void CFG::EmitBlocks() {
|
|||
// Last label is special.
|
||||
return;
|
||||
}
|
||||
// The end label is the start instruction of next block.
|
||||
// The end instruction of this block is the previous one.
|
||||
const Label end = *next_it;
|
||||
const size_t end_index = get_index(end) - 1;
|
||||
const size_t end_index = GetIndex(end) - 1;
|
||||
const auto& end_inst = inst_list[end_index];
|
||||
|
||||
// Insert block between the labels using the last instruction
|
||||
|
@ -111,7 +173,7 @@ void CFG::EmitBlocks() {
|
|||
Block* block = block_pool.Create();
|
||||
block->begin = start;
|
||||
block->end = end;
|
||||
block->begin_index = get_index(start);
|
||||
block->begin_index = GetIndex(start);
|
||||
block->end_index = end_index;
|
||||
block->end_inst = end_inst;
|
||||
block->cond = MakeCondition(end_inst.opcode);
|
||||
|
@ -126,8 +188,26 @@ void CFG::LinkBlocks() {
|
|||
return &*it;
|
||||
};
|
||||
|
||||
for (auto& block : blocks) {
|
||||
for (auto it = blocks.begin(); it != blocks.end(); it++) {
|
||||
auto& block = *it;
|
||||
const auto end_inst{block.end_inst};
|
||||
// Handle divergence block inserted here.
|
||||
if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
||||
end_inst.opcode == Opcode::S_ANDN2_B64) {
|
||||
// Blocks are stored ordered by address in the set
|
||||
auto next_it = std::next(it);
|
||||
auto* target_block = &(*next_it);
|
||||
++target_block->num_predecessors;
|
||||
block.branch_true = target_block;
|
||||
|
||||
auto merge_it = std::next(next_it);
|
||||
auto* merge_block = &(*merge_it);
|
||||
++merge_block->num_predecessors;
|
||||
block.branch_false = merge_block;
|
||||
block.end_class = EndClass::Branch;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the block doesn't end with a branch we simply
|
||||
// need to link with the next block.
|
||||
if (!end_inst.IsTerminateInstruction()) {
|
||||
|
|
|
@ -3,11 +3,13 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
|
@ -55,9 +57,26 @@ public:
|
|||
|
||||
private:
|
||||
void EmitLabels();
|
||||
void EmitDivergenceLabels();
|
||||
void EmitBlocks();
|
||||
void LinkBlocks();
|
||||
|
||||
void AddLabel(Label address) {
|
||||
const auto it = std::ranges::find(labels, address);
|
||||
if (it == labels.end()) {
|
||||
labels.push_back(address);
|
||||
}
|
||||
};
|
||||
|
||||
size_t GetIndex(Label label) {
|
||||
if (label == 0) {
|
||||
return 0ULL;
|
||||
}
|
||||
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
||||
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
||||
return std::distance(index_to_pc.begin(), it_index);
|
||||
};
|
||||
|
||||
public:
|
||||
Common::ObjectPool<Block>& block_pool;
|
||||
std::span<const GcnInst> inst_list;
|
||||
|
|
|
@ -436,7 +436,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
.dword_offset = sharp.dword_offset,
|
||||
.length = BufferLength(buffer),
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
.is_storage = is_store || buffer.GetSize() > MaxUboSize,
|
||||
.is_storage = true || is_store || buffer.GetSize() > MaxUboSize,
|
||||
.is_written = is_store,
|
||||
});
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t StagingBufferSize = 256_MB;
|
||||
static constexpr size_t StagingBufferSize = 512_MB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
|
|
|
@ -114,7 +114,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (buffer.is_written) {
|
||||
if (buffer.is_written && compute_key != 0xe991ee280187cbc) {
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment =
|
||||
|
|
|
@ -322,6 +322,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
info.pgm_hash = compute_key;
|
||||
auto program =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
|
|
Loading…
Reference in New Issue