control_flow_graph: Initial divergence handling (#434)
* control_flow_graph: Initial divergence handling * cfg: Handle additional case * spirv: Handle tgid enable bits * clang format * spirv: Use proper format * translator: Add more instructions
This commit is contained in:
parent
ff33b00c3a
commit
1d1c88ad31
|
@ -10,7 +10,7 @@
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <common/assert.h>
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/libs.h"
|
#include "core/libraries/libs.h"
|
||||||
|
|
|
@ -323,7 +323,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs
|
||||||
|
|
||||||
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
|
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
const auto format = buffer.buffer.GetDataFmt();
|
const auto format = buffer.dfmt;
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case AmdGpu::DataFormat::FormatInvalid:
|
case AmdGpu::DataFormat::FormatInvalid:
|
||||||
return ctx.f32_zero_value;
|
return ctx.f32_zero_value;
|
||||||
|
@ -348,7 +348,7 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
||||||
|
|
||||||
// uint index = address / 4;
|
// uint index = address / 4;
|
||||||
Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
const u32 stride = buffer.buffer.GetStride();
|
const u32 stride = buffer.stride;
|
||||||
if (stride > 4) {
|
if (stride > 4) {
|
||||||
const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
|
const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
|
||||||
if (index_offset > 0) {
|
if (index_offset > 0) {
|
||||||
|
@ -360,7 +360,7 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
||||||
|
|
||||||
const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
|
const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
|
||||||
const u32 bit_width = AmdGpu::ComponentBits(format, comp);
|
const u32 bit_width = AmdGpu::ComponentBits(format, comp);
|
||||||
const auto num_format = buffer.buffer.GetNumberFmt();
|
const auto num_format = buffer.nfmt;
|
||||||
if (num_format == AmdGpu::NumberFormat::Float) {
|
if (num_format == AmdGpu::NumberFormat::Float) {
|
||||||
if (bit_width == 32) {
|
if (bit_width == 32) {
|
||||||
return ctx.OpLoad(ctx.F32[1], ptr);
|
return ctx.OpLoad(ctx.F32[1], ptr);
|
||||||
|
@ -486,8 +486,8 @@ static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat fo
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
const auto format = buffer.buffer.GetDataFmt();
|
const auto format = buffer.dfmt;
|
||||||
const auto num_format = buffer.buffer.GetNumberFmt();
|
const auto num_format = buffer.nfmt;
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case AmdGpu::DataFormat::FormatInvalid:
|
case AmdGpu::DataFormat::FormatInvalid:
|
||||||
|
|
|
@ -363,7 +363,9 @@ void EmitContext::DefineBuffers() {
|
||||||
.binding = binding++,
|
.binding = binding++,
|
||||||
.data_types = data_types,
|
.data_types = data_types,
|
||||||
.pointer_type = pointer_type,
|
.pointer_type = pointer_type,
|
||||||
.buffer = buffer.GetVsharp(info),
|
.dfmt = buffer.dfmt,
|
||||||
|
.nfmt = buffer.nfmt,
|
||||||
|
.stride = buffer.GetVsharp(info).GetStride(),
|
||||||
});
|
});
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
i++;
|
i++;
|
||||||
|
|
|
@ -207,7 +207,9 @@ public:
|
||||||
u32 binding;
|
u32 binding;
|
||||||
const VectorIds* data_types;
|
const VectorIds* data_types;
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
AmdGpu::Buffer buffer;
|
AmdGpu::DataFormat dfmt;
|
||||||
|
AmdGpu::NumberFormat nfmt;
|
||||||
|
u32 stride;
|
||||||
};
|
};
|
||||||
|
|
||||||
u32& binding;
|
u32& binding;
|
||||||
|
|
|
@ -35,15 +35,22 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
||||||
return IR::Condition::Execz;
|
return IR::Condition::Execz;
|
||||||
case Opcode::S_CBRANCH_EXECNZ:
|
case Opcode::S_CBRANCH_EXECNZ:
|
||||||
return IR::Condition::Execnz;
|
return IR::Condition::Execnz;
|
||||||
|
case Opcode::S_AND_SAVEEXEC_B64:
|
||||||
|
case Opcode::S_ANDN2_B64:
|
||||||
|
return IR::Condition::Execnz;
|
||||||
default:
|
default:
|
||||||
return IR::Condition::True;
|
return IR::Condition::True;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr size_t LabelReserveSize = 32;
|
||||||
|
|
||||||
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||||
index_to_pc.resize(inst_list.size() + 1);
|
index_to_pc.resize(inst_list.size() + 1);
|
||||||
|
labels.reserve(LabelReserveSize);
|
||||||
EmitLabels();
|
EmitLabels();
|
||||||
|
EmitDivergenceLabels();
|
||||||
EmitBlocks();
|
EmitBlocks();
|
||||||
LinkBlocks();
|
LinkBlocks();
|
||||||
}
|
}
|
||||||
|
@ -51,14 +58,7 @@ CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_l
|
||||||
void CFG::EmitLabels() {
|
void CFG::EmitLabels() {
|
||||||
// Always set a label at entry point.
|
// Always set a label at entry point.
|
||||||
u32 pc = 0;
|
u32 pc = 0;
|
||||||
labels.push_back(pc);
|
AddLabel(pc);
|
||||||
|
|
||||||
const auto add_label = [this](u32 address) {
|
|
||||||
const auto it = std::ranges::find(labels, address);
|
|
||||||
if (it == labels.end()) {
|
|
||||||
labels.push_back(address);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Iterate instruction list and add labels to branch targets.
|
// Iterate instruction list and add labels to branch targets.
|
||||||
for (u32 i = 0; i < inst_list.size(); i++) {
|
for (u32 i = 0; i < inst_list.size(); i++) {
|
||||||
|
@ -66,15 +66,15 @@ void CFG::EmitLabels() {
|
||||||
const GcnInst inst = inst_list[i];
|
const GcnInst inst = inst_list[i];
|
||||||
if (inst.IsUnconditionalBranch()) {
|
if (inst.IsUnconditionalBranch()) {
|
||||||
const u32 target = inst.BranchTarget(pc);
|
const u32 target = inst.BranchTarget(pc);
|
||||||
add_label(target);
|
AddLabel(target);
|
||||||
} else if (inst.IsConditionalBranch()) {
|
} else if (inst.IsConditionalBranch()) {
|
||||||
const u32 true_label = inst.BranchTarget(pc);
|
const u32 true_label = inst.BranchTarget(pc);
|
||||||
const u32 false_label = pc + inst.length;
|
const u32 false_label = pc + inst.length;
|
||||||
add_label(true_label);
|
AddLabel(true_label);
|
||||||
add_label(false_label);
|
AddLabel(false_label);
|
||||||
} else if (inst.opcode == Opcode::S_ENDPGM) {
|
} else if (inst.opcode == Opcode::S_ENDPGM) {
|
||||||
const u32 next_label = pc + inst.length;
|
const u32 next_label = pc + inst.length;
|
||||||
add_label(next_label);
|
AddLabel(next_label);
|
||||||
}
|
}
|
||||||
pc += inst.length;
|
pc += inst.length;
|
||||||
}
|
}
|
||||||
|
@ -84,16 +84,70 @@ void CFG::EmitLabels() {
|
||||||
std::ranges::sort(labels);
|
std::ranges::sort(labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CFG::EmitBlocks() {
|
void CFG::EmitDivergenceLabels() {
|
||||||
const auto get_index = [this](Label label) -> size_t {
|
const auto is_open_scope = [](const GcnInst& inst) {
|
||||||
if (label == 0) {
|
// An open scope instruction is an instruction that modifies EXEC
|
||||||
return 0ULL;
|
// but also saves the previous value to restore later. This indicates
|
||||||
}
|
// we are entering a scope.
|
||||||
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
||||||
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
// While this instruction does not save EXEC it is often used paired
|
||||||
return std::distance(index_to_pc.begin(), it_index);
|
// with SAVEEXEC to mask the threads that didn't pass the condition
|
||||||
|
// of initial branch.
|
||||||
|
inst.opcode == Opcode::S_ANDN2_B64;
|
||||||
|
};
|
||||||
|
const auto is_close_scope = [](const GcnInst& inst) {
|
||||||
|
// Closing an EXEC scope can be either a branch instruction
|
||||||
|
// (typical case when S_AND_SAVEEXEC_B64 is right before a branch)
|
||||||
|
// or by a move instruction to EXEC that restores the backup.
|
||||||
|
return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
||||||
|
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
||||||
|
// Those instructions need to be wrapped in the condition as well so allow branch
|
||||||
|
// as end scope instruction.
|
||||||
|
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Since we will be adding new labels, avoid iterating those as well.
|
||||||
|
const size_t end_size = labels.size();
|
||||||
|
for (u32 l = 0; l < end_size; l++) {
|
||||||
|
const Label start = labels[l];
|
||||||
|
// Stop if we reached end of existing labels.
|
||||||
|
if (l == end_size - 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const Label end = labels[l + 1];
|
||||||
|
const size_t end_index = GetIndex(end);
|
||||||
|
|
||||||
|
s32 curr_begin = -1;
|
||||||
|
for (size_t index = GetIndex(start); index < end_index; index++) {
|
||||||
|
const auto& inst = inst_list[index];
|
||||||
|
if (is_close_scope(inst) && curr_begin != -1) {
|
||||||
|
// If there are no instructions inside scope don't do anything.
|
||||||
|
if (index - curr_begin == 1) {
|
||||||
|
curr_begin = -1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Add a label to the instruction right after the open scope call.
|
||||||
|
// It is the start of a new basic block.
|
||||||
|
const auto& save_inst = inst_list[curr_begin];
|
||||||
|
const Label label = index_to_pc[curr_begin] + save_inst.length;
|
||||||
|
AddLabel(label);
|
||||||
|
// Add a label to the close scope instruction as well.
|
||||||
|
AddLabel(index_to_pc[index]);
|
||||||
|
// Reset scope begin.
|
||||||
|
curr_begin = -1;
|
||||||
|
}
|
||||||
|
// Mark a potential start of an exec scope.
|
||||||
|
if (is_open_scope(inst)) {
|
||||||
|
curr_begin = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort labels to make sure block insertion is correct.
|
||||||
|
std::ranges::sort(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CFG::EmitBlocks() {
|
||||||
for (auto it = labels.begin(); it != labels.end(); it++) {
|
for (auto it = labels.begin(); it != labels.end(); it++) {
|
||||||
const Label start = *it;
|
const Label start = *it;
|
||||||
const auto next_it = std::next(it);
|
const auto next_it = std::next(it);
|
||||||
|
@ -102,8 +156,10 @@ void CFG::EmitBlocks() {
|
||||||
// Last label is special.
|
// Last label is special.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// The end label is the start instruction of next block.
|
||||||
|
// The end instruction of this block is the previous one.
|
||||||
const Label end = *next_it;
|
const Label end = *next_it;
|
||||||
const size_t end_index = get_index(end) - 1;
|
const size_t end_index = GetIndex(end) - 1;
|
||||||
const auto& end_inst = inst_list[end_index];
|
const auto& end_inst = inst_list[end_index];
|
||||||
|
|
||||||
// Insert block between the labels using the last instruction
|
// Insert block between the labels using the last instruction
|
||||||
|
@ -111,7 +167,7 @@ void CFG::EmitBlocks() {
|
||||||
Block* block = block_pool.Create();
|
Block* block = block_pool.Create();
|
||||||
block->begin = start;
|
block->begin = start;
|
||||||
block->end = end;
|
block->end = end;
|
||||||
block->begin_index = get_index(start);
|
block->begin_index = GetIndex(start);
|
||||||
block->end_index = end_index;
|
block->end_index = end_index;
|
||||||
block->end_inst = end_inst;
|
block->end_inst = end_inst;
|
||||||
block->cond = MakeCondition(end_inst.opcode);
|
block->cond = MakeCondition(end_inst.opcode);
|
||||||
|
@ -126,8 +182,26 @@ void CFG::LinkBlocks() {
|
||||||
return &*it;
|
return &*it;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& block : blocks) {
|
for (auto it = blocks.begin(); it != blocks.end(); it++) {
|
||||||
|
auto& block = *it;
|
||||||
const auto end_inst{block.end_inst};
|
const auto end_inst{block.end_inst};
|
||||||
|
// Handle divergence block inserted here.
|
||||||
|
if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
||||||
|
end_inst.opcode == Opcode::S_ANDN2_B64) {
|
||||||
|
// Blocks are stored ordered by address in the set
|
||||||
|
auto next_it = std::next(it);
|
||||||
|
auto* target_block = &(*next_it);
|
||||||
|
++target_block->num_predecessors;
|
||||||
|
block.branch_true = target_block;
|
||||||
|
|
||||||
|
auto merge_it = std::next(next_it);
|
||||||
|
auto* merge_block = &(*merge_it);
|
||||||
|
++merge_block->num_predecessors;
|
||||||
|
block.branch_false = merge_block;
|
||||||
|
block.end_class = EndClass::Branch;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// If the block doesn't end with a branch we simply
|
// If the block doesn't end with a branch we simply
|
||||||
// need to link with the next block.
|
// need to link with the next block.
|
||||||
if (!end_inst.IsTerminateInstruction()) {
|
if (!end_inst.IsTerminateInstruction()) {
|
||||||
|
|
|
@ -3,11 +3,13 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/intrusive/set.hpp>
|
#include <boost/intrusive/set.hpp>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
#include "common/object_pool.h"
|
#include "common/object_pool.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/frontend/instruction.h"
|
#include "shader_recompiler/frontend/instruction.h"
|
||||||
|
@ -55,9 +57,26 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void EmitLabels();
|
void EmitLabels();
|
||||||
|
void EmitDivergenceLabels();
|
||||||
void EmitBlocks();
|
void EmitBlocks();
|
||||||
void LinkBlocks();
|
void LinkBlocks();
|
||||||
|
|
||||||
|
void AddLabel(Label address) {
|
||||||
|
const auto it = std::ranges::find(labels, address);
|
||||||
|
if (it == labels.end()) {
|
||||||
|
labels.push_back(address);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t GetIndex(Label label) {
|
||||||
|
if (label == 0) {
|
||||||
|
return 0ULL;
|
||||||
|
}
|
||||||
|
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
||||||
|
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
||||||
|
return std::distance(index_to_pc.begin(), it_index);
|
||||||
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Common::ObjectPool<Block>& block_pool;
|
Common::ObjectPool<Block>& block_pool;
|
||||||
std::span<const GcnInst> inst_list;
|
std::span<const GcnInst> inst_list;
|
||||||
|
|
|
@ -29,6 +29,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||||
return S_CMP(ConditionOp::LG, true, inst);
|
return S_CMP(ConditionOp::LG, true, inst);
|
||||||
case Opcode::S_CMP_GT_I32:
|
case Opcode::S_CMP_GT_I32:
|
||||||
return S_CMP(ConditionOp::GT, true, inst);
|
return S_CMP(ConditionOp::GT, true, inst);
|
||||||
|
case Opcode::S_CMP_LE_I32:
|
||||||
|
return S_CMP(ConditionOp::LE, true, inst);
|
||||||
case Opcode::S_CMP_GE_I32:
|
case Opcode::S_CMP_GE_I32:
|
||||||
return S_CMP(ConditionOp::GE, true, inst);
|
return S_CMP(ConditionOp::GE, true, inst);
|
||||||
case Opcode::S_CMP_EQ_I32:
|
case Opcode::S_CMP_EQ_I32:
|
||||||
|
|
|
@ -64,9 +64,15 @@ void Translator::EmitPrologue() {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||||
|
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
if (info.tgid_enable[0]) {
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
}
|
||||||
|
if (info.tgid_enable[1]) {
|
||||||
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
||||||
|
}
|
||||||
|
if (info.tgid_enable[2]) {
|
||||||
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Unknown shader stage");
|
throw NotImplementedException("Unknown shader stage");
|
||||||
|
|
|
@ -91,6 +91,11 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||||
return BUFFER_STORE_FORMAT(4, false, true, inst);
|
return BUFFER_STORE_FORMAT(4, false, true, inst);
|
||||||
|
|
||||||
|
case Opcode::TBUFFER_STORE_FORMAT_X:
|
||||||
|
return BUFFER_STORE_FORMAT(1, true, true, inst);
|
||||||
|
case Opcode::TBUFFER_STORE_FORMAT_XYZ:
|
||||||
|
return BUFFER_STORE_FORMAT(3, true, true, inst);
|
||||||
|
|
||||||
case Opcode::BUFFER_STORE_DWORD:
|
case Opcode::BUFFER_STORE_DWORD:
|
||||||
return BUFFER_STORE_FORMAT(1, false, false, inst);
|
return BUFFER_STORE_FORMAT(1, false, false, inst);
|
||||||
case Opcode::BUFFER_STORE_DWORDX2:
|
case Opcode::BUFFER_STORE_DWORDX2:
|
||||||
|
|
|
@ -180,6 +180,7 @@ struct Info {
|
||||||
SamplerResourceList samplers;
|
SamplerResourceList samplers;
|
||||||
|
|
||||||
std::array<u32, 3> workgroup_size{};
|
std::array<u32, 3> workgroup_size{};
|
||||||
|
std::array<bool, 3> tgid_enable;
|
||||||
|
|
||||||
u32 num_user_data;
|
u32 num_user_data;
|
||||||
u32 num_input_vgprs;
|
u32 num_input_vgprs;
|
||||||
|
|
|
@ -130,6 +130,7 @@ struct Liverpool {
|
||||||
BitField<0, 6, u64> num_vgprs;
|
BitField<0, 6, u64> num_vgprs;
|
||||||
BitField<6, 4, u64> num_sgprs;
|
BitField<6, 4, u64> num_sgprs;
|
||||||
BitField<33, 5, u64> num_user_regs;
|
BitField<33, 5, u64> num_user_regs;
|
||||||
|
BitField<39, 3, u64> tgid_enable;
|
||||||
BitField<47, 9, u64> lds_dwords;
|
BitField<47, 9, u64> lds_dwords;
|
||||||
} settings;
|
} settings;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
|
@ -148,6 +149,10 @@ struct Liverpool {
|
||||||
return settings.lds_dwords.Value() * 128 * 4;
|
return settings.lds_dwords.Value() * 128 * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsTgidEnabled(u32 i) const noexcept {
|
||||||
|
return (settings.tgid_enable.Value() >> i) & 1;
|
||||||
|
}
|
||||||
|
|
||||||
std::span<const u32> Code() const {
|
std::span<const u32> Code() const {
|
||||||
const u32* code = Address<u32*>();
|
const u32* code = Address<u32*>();
|
||||||
BinaryInfo bininfo;
|
BinaryInfo bininfo;
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
static constexpr size_t StagingBufferSize = 256_MB;
|
static constexpr size_t StagingBufferSize = 512_MB;
|
||||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
|
|
|
@ -93,6 +93,8 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
||||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||||
cs_pgm.num_thread_z.full};
|
cs_pgm.num_thread_z.full};
|
||||||
|
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||||
|
cs_pgm.IsTgidEnabled(2)};
|
||||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -324,6 +326,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
||||||
Shader::Info info =
|
Shader::Info info =
|
||||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||||
|
info.pgm_hash = compute_key;
|
||||||
auto program =
|
auto program =
|
||||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue