diff --git a/src/core/libraries/network/net.cpp b/src/core/libraries/network/net.cpp index 958f9264..2c03dde3 100644 --- a/src/core/libraries/network/net.cpp +++ b/src/core/libraries/network/net.cpp @@ -10,7 +10,7 @@ #include #endif -#include +#include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 2925c05d..0fec0c2f 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -35,15 +35,22 @@ static IR::Condition MakeCondition(Opcode opcode) { return IR::Condition::Execz; case Opcode::S_CBRANCH_EXECNZ: return IR::Condition::Execnz; + case Opcode::S_AND_SAVEEXEC_B64: + case Opcode::S_ANDN2_B64: + return IR::Condition::Execnz; default: return IR::Condition::True; } } +static constexpr size_t LabelReserveSize = 32; + CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) : block_pool{block_pool_}, inst_list{inst_list_} { index_to_pc.resize(inst_list.size() + 1); + labels.reserve(LabelReserveSize); EmitLabels(); + EmitDivergenceLabels(); EmitBlocks(); LinkBlocks(); } @@ -51,14 +58,7 @@ CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_l void CFG::EmitLabels() { // Always set a label at entry point. u32 pc = 0; - labels.push_back(pc); - - const auto add_label = [this](u32 address) { - const auto it = std::ranges::find(labels, address); - if (it == labels.end()) { - labels.push_back(address); - } - }; + AddLabel(pc); // Iterate instruction list and add labels to branch targets. for (u32 i = 0; i < inst_list.size(); i++) { @@ -66,15 +66,15 @@ void CFG::EmitLabels() { const GcnInst inst = inst_list[i]; if (inst.IsUnconditionalBranch()) { const u32 target = inst.BranchTarget(pc); - add_label(target); + AddLabel(target); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; - add_label(true_label); - add_label(false_label); + AddLabel(true_label); + AddLabel(false_label); } else if (inst.opcode == Opcode::S_ENDPGM) { const u32 next_label = pc + inst.length; - add_label(next_label); + AddLabel(next_label); } pc += inst.length; } @@ -84,16 +84,76 @@ void CFG::EmitLabels() { std::ranges::sort(labels); } -void CFG::EmitBlocks() { - const auto get_index = [this](Label label) -> size_t { - if (label == 0) { - return 0ULL; - } - const auto it_index = std::ranges::lower_bound(index_to_pc, label); - ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back()); - return std::distance(index_to_pc.begin(), it_index); +void CFG::EmitDivergenceLabels() { + const auto is_open_scope = [](const GcnInst& inst) { + // An open scope instruction is an instruction that modifies EXEC + // but also saves the previous value to restore later. This indicates + // we are entering a scope. + return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || + // While this instruction does not save EXEC it is often used paired + // with SAVEEXEC to mask the threads that didn't pass the condition + // of initial branch. + inst.opcode == Opcode::S_ANDN2_B64; + }; + const auto is_close_scope = [](const GcnInst& inst) { + // Closing an EXEC scope can be either a branch instruction + // (typical case when S_AND_SAVEEXEC_B64 is right before a branch) + // or by a move instruction to EXEC that restores the backup. + return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) || + // Sometimes compiler might insert instructions between the SAVEEXEC and the branch. + // Those instructions need to be wrapped in the condition as well so allow branch + // as end scope instruction. + inst.opcode == Opcode::S_CBRANCH_EXECZ; }; + // Since we will be adding new labels, avoid iterating those as well. + const size_t end_size = labels.size(); + for (u32 l = 0; l < end_size; l++) { + const Label start = labels[l]; + // Stop if we reached end of existing labels. + if (l == end_size - 1) { + break; + } + const Label end = labels[l + 1]; + const size_t end_index = GetIndex(end); + + s32 curr_begin = -1; + for (size_t index = GetIndex(start); index < end_index; index++) { + const auto& inst = inst_list[index]; + // Mark a potential start of an exec scope. + if (is_open_scope(inst)) { + curr_begin = index; + continue; + } + if (is_close_scope(inst) && curr_begin != -1) { + // If there are no instructions inside scope don't do anything. + if (index - curr_begin == 1) { + curr_begin = -1; + continue; + } + // Ensure the register holding EXEC is the same as the one saved. + const u32 backup_sreg = inst_list[curr_begin].dst[0].code; + const u32 restore_sreg = inst.src[0].code; + if (inst.opcode == Opcode::S_MOV_B64 && backup_sreg != restore_sreg) { + continue; + } + // Add a label to the instruction right after the open scope call. + // It is the start of a new basic block. + const auto& save_inst = inst_list[curr_begin]; + const Label label = index_to_pc[curr_begin] + save_inst.length; + AddLabel(label); + // Add a label to the close scope instruction as well. + AddLabel(index_to_pc[index]); + curr_begin = -1; + } + } + } + + // Sort labels to make sure block insertion is correct. + std::ranges::sort(labels); +} + +void CFG::EmitBlocks() { for (auto it = labels.begin(); it != labels.end(); it++) { const Label start = *it; const auto next_it = std::next(it); @@ -102,8 +162,10 @@ void CFG::EmitBlocks() { // Last label is special. return; } + // The end label is the start instruction of next block. + // The end instruction of this block is the previous one. const Label end = *next_it; - const size_t end_index = get_index(end) - 1; + const size_t end_index = GetIndex(end) - 1; const auto& end_inst = inst_list[end_index]; // Insert block between the labels using the last instruction @@ -111,7 +173,7 @@ void CFG::EmitBlocks() { Block* block = block_pool.Create(); block->begin = start; block->end = end; - block->begin_index = get_index(start); + block->begin_index = GetIndex(start); block->end_index = end_index; block->end_inst = end_inst; block->cond = MakeCondition(end_inst.opcode); @@ -126,8 +188,26 @@ void CFG::LinkBlocks() { return &*it; }; - for (auto& block : blocks) { + for (auto it = blocks.begin(); it != blocks.end(); it++) { + auto& block = *it; const auto end_inst{block.end_inst}; + // Handle divergence block inserted here. + if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || + end_inst.opcode == Opcode::S_ANDN2_B64) { + // Blocks are stored ordered by address in the set + auto next_it = std::next(it); + auto* target_block = &(*next_it); + ++target_block->num_predecessors; + block.branch_true = target_block; + + auto merge_it = std::next(next_it); + auto* merge_block = &(*merge_it); + ++merge_block->num_predecessors; + block.branch_false = merge_block; + block.end_class = EndClass::Branch; + continue; + } + // If the block doesn't end with a branch we simply // need to link with the next block. if (!end_inst.IsTerminateInstruction()) { diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index ebe614ee..d98d4b05 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -3,11 +3,13 @@ #pragma once +#include #include #include #include #include +#include "common/assert.h" #include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/frontend/instruction.h" @@ -55,9 +57,26 @@ public: private: void EmitLabels(); + void EmitDivergenceLabels(); void EmitBlocks(); void LinkBlocks(); + void AddLabel(Label address) { + const auto it = std::ranges::find(labels, address); + if (it == labels.end()) { + labels.push_back(address); + } + }; + + size_t GetIndex(Label label) { + if (label == 0) { + return 0ULL; + } + const auto it_index = std::ranges::lower_bound(index_to_pc, label); + ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back()); + return std::distance(index_to_pc.begin(), it_index); + }; + public: Common::ObjectPool& block_pool; std::span inst_list; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97fc5b99..8c727bf8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -436,7 +436,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, .dword_offset = sharp.dword_offset, .length = BufferLength(buffer), .used_types = BufferDataType(inst, buffer.GetNumberFmt()), - .is_storage = is_store || buffer.GetSize() > MaxUboSize, + .is_storage = true || is_store || buffer.GetSize() > MaxUboSize, .is_written = is_store, }); } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 2246807a..02d6b2ce 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -13,7 +13,7 @@ namespace VideoCore { -static constexpr size_t StagingBufferSize = 256_MB; +static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t UboStreamBufferSize = 64_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 62b50eeb..f44880d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -114,7 +114,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } } const u32 size = vsharp.GetSize(); - if (buffer.is_written) { + if (buffer.is_written && compute_key != 0xe991ee280187cbc) { texture_cache.InvalidateMemory(address, size, true); } const u32 alignment = diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 38d1f51b..200d1a52 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -322,6 +322,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { Shader::Info info = MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); info.pgm_base = cs_pgm.Address(); + info.pgm_hash = compute_key; auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);