Merge pull request #292 from shadps4-emu/games/00144
Missing graphics features for flOw & Flower
This commit is contained in:
commit
b4df90d8e4
|
@ -495,8 +495,33 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
||||||
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
||||||
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
||||||
|
const std::span<const u32> acb_span{acb_ptr, acb_size >> 2u};
|
||||||
|
|
||||||
liverpool->SubmitAsc(vqid, {acb_ptr, acb_size >> 2u});
|
if (Config::dumpPM4()) {
|
||||||
|
static auto last_frame_num = -1LL;
|
||||||
|
static u32 seq_num{};
|
||||||
|
if (last_frame_num == frames_submitted) {
|
||||||
|
++seq_num;
|
||||||
|
} else {
|
||||||
|
last_frame_num = frames_submitted;
|
||||||
|
seq_num = 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Up to this point, all ACB submissions have been stored in a secondary command buffer.
|
||||||
|
// Dumping them using the current ring pointer would result in files containing only the
|
||||||
|
// `IndirectBuffer` command. To access the actual command stream, we need to unwrap the IB.
|
||||||
|
auto acb = acb_span;
|
||||||
|
const auto* indirect_buffer =
|
||||||
|
reinterpret_cast<const PM4CmdIndirectBuffer*>(acb_span.data());
|
||||||
|
if (indirect_buffer->header.opcode == PM4ItOpcode::IndirectBuffer) {
|
||||||
|
acb = {indirect_buffer->Address<const u32>(), indirect_buffer->ib_size};
|
||||||
|
}
|
||||||
|
|
||||||
|
// File name format is: <queue>_<queue num>_<submit_num>
|
||||||
|
DumpCommandList(acb, std::format("acb_{}_{}", gnm_vqid, seq_num));
|
||||||
|
}
|
||||||
|
|
||||||
|
liverpool->SubmitAsc(vqid, acb_span);
|
||||||
|
|
||||||
*asc_queue.read_addr += acb_size;
|
*asc_queue.read_addr += acb_size;
|
||||||
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
||||||
|
|
|
@ -185,6 +185,10 @@ void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev) {
|
||||||
return ev->udata;
|
return ev->udata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 PS4_SYSV_ABI sceKernelGetEventId(const SceKernelEvent* ev) {
|
||||||
|
return ev->ident;
|
||||||
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata) {
|
int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata) {
|
||||||
if (eq == nullptr) {
|
if (eq == nullptr) {
|
||||||
return ORBIS_KERNEL_ERROR_EBADF;
|
return ORBIS_KERNEL_ERROR_EBADF;
|
||||||
|
|
|
@ -15,6 +15,7 @@ int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq);
|
||||||
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
||||||
SceKernelUseconds* timo);
|
SceKernelUseconds* timo);
|
||||||
void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev);
|
void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev);
|
||||||
|
u64 PS4_SYSV_ABI sceKernelGetEventId(const SceKernelEvent* ev);
|
||||||
int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata);
|
int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata);
|
||||||
int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id);
|
int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id);
|
||||||
int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id);
|
int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id);
|
||||||
|
|
|
@ -401,6 +401,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent);
|
LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent);
|
||||||
LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent);
|
LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent);
|
||||||
LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent);
|
LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent);
|
||||||
|
LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId);
|
||||||
|
|
||||||
// misc
|
// misc
|
||||||
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);
|
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);
|
||||||
|
|
|
@ -131,6 +131,13 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
|
||||||
return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index));
|
return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||||
|
return ctx.OpLoad(
|
||||||
|
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||||
|
ctx.instance_step_rates,
|
||||||
|
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
|
@ -149,11 +156,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
return ctx.OpLoad(param.component_type, param.id);
|
return ctx.OpLoad(param.component_type, param.id);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
|
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
||||||
const auto step_rate = ctx.OpLoad(
|
|
||||||
ctx.U32[1],
|
|
||||||
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
|
||||||
ctx.instance_step_rates, rate_idx));
|
|
||||||
const auto offset = ctx.OpIAdd(
|
const auto offset = ctx.OpIAdd(
|
||||||
ctx.U32[1],
|
ctx.U32[1],
|
||||||
ctx.OpIMul(
|
ctx.OpIMul(
|
||||||
|
@ -182,6 +185,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::VertexId:
|
case IR::Attribute::VertexId:
|
||||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
||||||
|
case IR::Attribute::InstanceId:
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||||
|
case IR::Attribute::InstanceId0:
|
||||||
|
return EmitReadStepRate(ctx, 0);
|
||||||
|
case IR::Attribute::InstanceId1:
|
||||||
|
return EmitReadStepRate(ctx, 1);
|
||||||
case IR::Attribute::WorkgroupId:
|
case IR::Attribute::WorkgroupId:
|
||||||
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp);
|
||||||
case IR::Attribute::LocalInvocationId:
|
case IR::Attribute::LocalInvocationId:
|
||||||
|
|
|
@ -45,6 +45,7 @@ void EmitSetVccHi(EmitContext& ctx);
|
||||||
void EmitPrologue(EmitContext& ctx);
|
void EmitPrologue(EmitContext& ctx);
|
||||||
void EmitEpilogue(EmitContext& ctx);
|
void EmitEpilogue(EmitContext& ctx);
|
||||||
void EmitDiscard(EmitContext& ctx);
|
void EmitDiscard(EmitContext& ctx);
|
||||||
|
void EmitDiscardCond(EmitContext& ctx, Id condition);
|
||||||
void EmitBarrier(EmitContext& ctx);
|
void EmitBarrier(EmitContext& ctx);
|
||||||
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
|
||||||
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
void EmitDeviceMemoryBarrier(EmitContext& ctx);
|
||||||
|
|
|
@ -14,6 +14,17 @@ void EmitDiscard(EmitContext& ctx) {
|
||||||
ctx.OpDemoteToHelperInvocationEXT();
|
ctx.OpDemoteToHelperInvocationEXT();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitDiscardCond(EmitContext& ctx, Id condition) {
|
||||||
|
const Id kill_label{ctx.OpLabel()};
|
||||||
|
const Id merge_label{ctx.OpLabel()};
|
||||||
|
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||||
|
ctx.OpBranchConditional(condition, kill_label, merge_label);
|
||||||
|
ctx.AddLabel(kill_label);
|
||||||
|
ctx.OpDemoteToHelperInvocationEXT();
|
||||||
|
ctx.OpBranch(merge_label);
|
||||||
|
ctx.AddLabel(merge_label);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
|
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
|
||||||
throw NotImplementedException("Geometry streams");
|
throw NotImplementedException("Geometry streams");
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,7 +121,7 @@ void CFG::EmitBlocks() {
|
||||||
|
|
||||||
void CFG::LinkBlocks() {
|
void CFG::LinkBlocks() {
|
||||||
const auto get_block = [this](u32 address) {
|
const auto get_block = [this](u32 address) {
|
||||||
const auto it = blocks.find(address, Compare{});
|
auto it = blocks.find(address, Compare{});
|
||||||
ASSERT_MSG(it != blocks.end() && it->begin == address);
|
ASSERT_MSG(it != blocks.end() && it->begin == address);
|
||||||
return &*it;
|
return &*it;
|
||||||
};
|
};
|
||||||
|
@ -131,7 +131,10 @@ void CFG::LinkBlocks() {
|
||||||
// If the block doesn't end with a branch we simply
|
// If the block doesn't end with a branch we simply
|
||||||
// need to link with the next block.
|
// need to link with the next block.
|
||||||
if (!end_inst.IsTerminateInstruction()) {
|
if (!end_inst.IsTerminateInstruction()) {
|
||||||
block.branch_true = get_block(block.end);
|
auto* next_block = get_block(block.end);
|
||||||
|
++next_block->num_predecessors;
|
||||||
|
|
||||||
|
block.branch_true = next_block;
|
||||||
block.end_class = EndClass::Branch;
|
block.end_class = EndClass::Branch;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -141,11 +144,20 @@ void CFG::LinkBlocks() {
|
||||||
const u32 branch_pc = block.end - end_inst.length;
|
const u32 branch_pc = block.end - end_inst.length;
|
||||||
const u32 target_pc = end_inst.BranchTarget(branch_pc);
|
const u32 target_pc = end_inst.BranchTarget(branch_pc);
|
||||||
if (end_inst.IsUnconditionalBranch()) {
|
if (end_inst.IsUnconditionalBranch()) {
|
||||||
block.branch_true = get_block(target_pc);
|
auto* target_block = get_block(target_pc);
|
||||||
|
++target_block->num_predecessors;
|
||||||
|
|
||||||
|
block.branch_true = target_block;
|
||||||
block.end_class = EndClass::Branch;
|
block.end_class = EndClass::Branch;
|
||||||
} else if (end_inst.IsConditionalBranch()) {
|
} else if (end_inst.IsConditionalBranch()) {
|
||||||
block.branch_true = get_block(target_pc);
|
auto* target_block = get_block(target_pc);
|
||||||
block.branch_false = get_block(block.end);
|
++target_block->num_predecessors;
|
||||||
|
|
||||||
|
auto* end_block = get_block(block.end);
|
||||||
|
++end_block->num_predecessors;
|
||||||
|
|
||||||
|
block.branch_true = target_block;
|
||||||
|
block.branch_false = end_block;
|
||||||
block.end_class = EndClass::Branch;
|
block.end_class = EndClass::Branch;
|
||||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||||
|
|
|
@ -36,6 +36,7 @@ struct Block : Hook {
|
||||||
u32 end;
|
u32 end;
|
||||||
u32 begin_index;
|
u32 begin_index;
|
||||||
u32 end_index;
|
u32 end_index;
|
||||||
|
u32 num_predecessors{};
|
||||||
IR::Condition cond{};
|
IR::Condition cond{};
|
||||||
GcnInst end_inst{};
|
GcnInst end_inst{};
|
||||||
EndClass end_class{};
|
EndClass end_class{};
|
||||||
|
|
|
@ -631,6 +631,7 @@ private:
|
||||||
case StatementType::Code: {
|
case StatementType::Code: {
|
||||||
ensure_block();
|
ensure_block();
|
||||||
if (!stmt.block->is_dummy) {
|
if (!stmt.block->is_dummy) {
|
||||||
|
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
|
||||||
const u32 start = stmt.block->begin_index;
|
const u32 start = stmt.block->begin_index;
|
||||||
const u32 size = stmt.block->end_index - start + 1;
|
const u32 size = stmt.block->end_index - start + 1;
|
||||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||||
|
|
|
@ -1,11 +1,17 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
void Translator::EXP(const GcnInst& inst) {
|
void Translator::EXP(const GcnInst& inst) {
|
||||||
|
if (ir.block->has_multiple_predecessors) {
|
||||||
|
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
|
||||||
|
ir.Discard(ir.LogicalNot(ir.GetExec()));
|
||||||
|
}
|
||||||
|
|
||||||
const auto& exp = inst.control.exp;
|
const auto& exp = inst.control.exp;
|
||||||
const IR::Attribute attrib{exp.target};
|
const IR::Attribute attrib{exp.target};
|
||||||
const std::array vsrc = {
|
const std::array vsrc = {
|
||||||
|
|
|
@ -35,10 +35,20 @@ void Translator::EmitPrologue() {
|
||||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||||
switch (info.stage) {
|
switch (info.stage) {
|
||||||
case Stage::Vertex:
|
case Stage::Vertex:
|
||||||
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23
|
// v0: vertex ID, always present
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||||
|
// v1: instance ID, step rate 0
|
||||||
|
if (info.num_input_vgprs > 0) {
|
||||||
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
||||||
|
}
|
||||||
|
// v2: instance ID, step rate 1
|
||||||
|
if (info.num_input_vgprs > 1) {
|
||||||
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
||||||
|
}
|
||||||
|
// v3: instance ID, plain
|
||||||
|
if (info.num_input_vgprs > 2) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
}
|
||||||
break;
|
break;
|
||||||
case Stage::Fragment:
|
case Stage::Fragment:
|
||||||
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258
|
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258
|
||||||
|
|
|
@ -72,6 +72,8 @@ enum class Attribute : u64 {
|
||||||
LocalInvocationId = 75,
|
LocalInvocationId = 75,
|
||||||
LocalInvocationIndex = 76,
|
LocalInvocationIndex = 76,
|
||||||
FragCoord = 77,
|
FragCoord = 77,
|
||||||
|
InstanceId0 = 78, // step rate 0
|
||||||
|
InstanceId1 = 79, // step rate 1
|
||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -149,6 +149,8 @@ public:
|
||||||
std::array<Value, NumScalarRegs> ssa_sreg_values;
|
std::array<Value, NumScalarRegs> ssa_sreg_values;
|
||||||
std::array<Value, NumVectorRegs> ssa_vreg_values;
|
std::array<Value, NumVectorRegs> ssa_vreg_values;
|
||||||
|
|
||||||
|
bool has_multiple_predecessors{false};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Memory pool for instruction list
|
/// Memory pool for instruction list
|
||||||
ObjectPool<Inst>* inst_pool;
|
ObjectPool<Inst>* inst_pool;
|
||||||
|
|
|
@ -115,6 +115,10 @@ void IREmitter::Discard() {
|
||||||
Inst(Opcode::Discard);
|
Inst(Opcode::Discard);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IREmitter::Discard(const U1& cond) {
|
||||||
|
Inst(Opcode::DiscardCond, cond);
|
||||||
|
}
|
||||||
|
|
||||||
void IREmitter::Barrier() {
|
void IREmitter::Barrier() {
|
||||||
Inst(Opcode::Barrier);
|
Inst(Opcode::Barrier);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ public:
|
||||||
void Prologue();
|
void Prologue();
|
||||||
void Epilogue();
|
void Epilogue();
|
||||||
void Discard();
|
void Discard();
|
||||||
|
void Discard(const U1& cond);
|
||||||
|
|
||||||
void Barrier();
|
void Barrier();
|
||||||
void WorkgroupMemoryBarrier();
|
void WorkgroupMemoryBarrier();
|
||||||
|
|
|
@ -49,6 +49,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||||
case Opcode::Prologue:
|
case Opcode::Prologue:
|
||||||
case Opcode::Epilogue:
|
case Opcode::Epilogue:
|
||||||
case Opcode::Discard:
|
case Opcode::Discard:
|
||||||
|
case Opcode::DiscardCond:
|
||||||
case Opcode::SetAttribute:
|
case Opcode::SetAttribute:
|
||||||
case Opcode::StoreBufferF32:
|
case Opcode::StoreBufferF32:
|
||||||
case Opcode::StoreBufferF32x2:
|
case Opcode::StoreBufferF32x2:
|
||||||
|
|
|
@ -13,6 +13,7 @@ OPCODE(PhiMove, Void, Opaq
|
||||||
OPCODE(Prologue, Void, )
|
OPCODE(Prologue, Void, )
|
||||||
OPCODE(Epilogue, Void, )
|
OPCODE(Epilogue, Void, )
|
||||||
OPCODE(Discard, Void, )
|
OPCODE(Discard, Void, )
|
||||||
|
OPCODE(DiscardCond, Void, U1, )
|
||||||
|
|
||||||
// Constant memory operations
|
// Constant memory operations
|
||||||
OPCODE(ReadConst, U32, U32x2, U32, )
|
OPCODE(ReadConst, U32, U32x2, U32, )
|
||||||
|
|
|
@ -37,6 +37,7 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||||
info.uses_group_quad = true;
|
info.uses_group_quad = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::Discard:
|
case IR::Opcode::Discard:
|
||||||
|
case IR::Opcode::DiscardCond:
|
||||||
info.has_discard = true;
|
info.has_discard = true;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::ImageGather:
|
case IR::Opcode::ImageGather:
|
||||||
|
|
|
@ -163,6 +163,7 @@ struct Info {
|
||||||
std::array<u32, 3> workgroup_size{};
|
std::array<u32, 3> workgroup_size{};
|
||||||
|
|
||||||
u32 num_user_data;
|
u32 num_user_data;
|
||||||
|
u32 num_input_vgprs;
|
||||||
std::span<const u32> user_data;
|
std::span<const u32> user_data;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
|
|
||||||
|
|
|
@ -199,19 +199,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
|
|
||||||
switch (reg_addr) {
|
switch (reg_addr) {
|
||||||
case ContextRegs::CbColor0Base:
|
case ContextRegs::CbColor0Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor1Base:
|
case ContextRegs::CbColor1Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor2Base:
|
case ContextRegs::CbColor2Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor3Base:
|
case ContextRegs::CbColor3Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor4Base:
|
case ContextRegs::CbColor4Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor5Base:
|
case ContextRegs::CbColor5Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor6Base:
|
case ContextRegs::CbColor6Base:
|
||||||
[[fallthrough]];
|
|
||||||
case ContextRegs::CbColor7Base: {
|
case ContextRegs::CbColor7Base: {
|
||||||
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
|
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
|
||||||
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
|
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
|
||||||
|
@ -227,6 +220,26 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case ContextRegs::CbColor0Cmask:
|
||||||
|
case ContextRegs::CbColor1Cmask:
|
||||||
|
case ContextRegs::CbColor2Cmask:
|
||||||
|
case ContextRegs::CbColor3Cmask:
|
||||||
|
case ContextRegs::CbColor4Cmask:
|
||||||
|
case ContextRegs::CbColor5Cmask:
|
||||||
|
case ContextRegs::CbColor6Cmask:
|
||||||
|
case ContextRegs::CbColor7Cmask: {
|
||||||
|
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Cmask) /
|
||||||
|
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
|
||||||
|
ASSERT(col_buf_id < NumColorBuffers);
|
||||||
|
|
||||||
|
const auto nop_offset = header->type3.count;
|
||||||
|
if (nop_offset == 0x04) {
|
||||||
|
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||||
|
"NOP hint is missing in CB setup sequence");
|
||||||
|
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ContextRegs::DbZInfo: {
|
case ContextRegs::DbZInfo: {
|
||||||
if (header->type3.count == 8) {
|
if (header->type3.count == 8) {
|
||||||
ASSERT_MSG(payload[20] == 0xc0001000,
|
ASSERT_MSG(payload[20] == 0xc0001000,
|
||||||
|
@ -267,7 +280,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.num_indices = draw_index->index_count;
|
regs.num_indices = draw_index->index_count;
|
||||||
regs.draw_initiator = draw_index->draw_initiator;
|
regs.draw_initiator = draw_index->draw_initiator;
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
|
rasterizer->ScopeMarkerBegin(
|
||||||
|
fmt::format("dcb:{}:DrawIndex2", reinterpret_cast<const void*>(dcb.data())));
|
||||||
rasterizer->Draw(true);
|
rasterizer->Draw(true);
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -277,7 +293,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.num_indices = draw_index_off->index_count;
|
regs.num_indices = draw_index_off->index_count;
|
||||||
regs.draw_initiator = draw_index_off->draw_initiator;
|
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
|
rasterizer->ScopeMarkerBegin(fmt::format(
|
||||||
|
"dcb:{}:DrawIndexOffset2", reinterpret_cast<const void*>(dcb.data())));
|
||||||
rasterizer->Draw(true, draw_index_off->index_offset);
|
rasterizer->Draw(true, draw_index_off->index_offset);
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -286,7 +305,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.num_indices = draw_index->index_count;
|
regs.num_indices = draw_index->index_count;
|
||||||
regs.draw_initiator = draw_index->draw_initiator;
|
regs.draw_initiator = draw_index->draw_initiator;
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
|
rasterizer->ScopeMarkerBegin(
|
||||||
|
fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast<const void*>(dcb.data())));
|
||||||
rasterizer->Draw(false);
|
rasterizer->Draw(false);
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -297,7 +319,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||||
|
rasterizer->ScopeMarkerBegin(
|
||||||
|
fmt::format("dcb:{}:Dispatch", reinterpret_cast<const void*>(dcb.data())));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -387,7 +412,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
TracyFiberLeave;
|
TracyFiberLeave;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
TracyFiberEnter(acb_task_name);
|
TracyFiberEnter(acb_task_name);
|
||||||
|
|
||||||
while (!acb.empty()) {
|
while (!acb.empty()) {
|
||||||
|
@ -408,8 +433,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndirectBuffer: {
|
case PM4ItOpcode::IndirectBuffer: {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task =
|
auto task = ProcessCompute(
|
||||||
ProcessCompute({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||||
while (!task.handle.done()) {
|
while (!task.handle.done()) {
|
||||||
task.handle.resume();
|
task.handle.resume();
|
||||||
|
|
||||||
|
@ -435,7 +460,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||||
|
rasterizer->ScopeMarkerBegin(fmt::format(
|
||||||
|
"acb[{}]:{}:Dispatch", vqid, reinterpret_cast<const void*>(acb.data())));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
|
rasterizer->ScopeMarkerEnd();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -495,7 +523,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||||
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||||
auto& queue = mapped_queues[vqid];
|
auto& queue = mapped_queues[vqid];
|
||||||
|
|
||||||
const auto& task = ProcessCompute(acb);
|
const auto& task = ProcessCompute(acb, vqid);
|
||||||
{
|
{
|
||||||
std::unique_lock lock{queue.m_access};
|
std::unique_lock lock{queue.m_access};
|
||||||
queue.submits.emplace(task.handle);
|
queue.submits.emplace(task.handle);
|
||||||
|
|
|
@ -80,6 +80,7 @@ struct Liverpool {
|
||||||
union {
|
union {
|
||||||
BitField<0, 6, u64> num_vgprs;
|
BitField<0, 6, u64> num_vgprs;
|
||||||
BitField<6, 4, u64> num_sgprs;
|
BitField<6, 4, u64> num_sgprs;
|
||||||
|
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
||||||
BitField<33, 5, u64> num_user_regs;
|
BitField<33, 5, u64> num_user_regs;
|
||||||
} settings;
|
} settings;
|
||||||
UserData user_data;
|
UserData user_data;
|
||||||
|
@ -785,6 +786,14 @@ struct Liverpool {
|
||||||
CbColor5Base = 0xA363,
|
CbColor5Base = 0xA363,
|
||||||
CbColor6Base = 0xA372,
|
CbColor6Base = 0xA372,
|
||||||
CbColor7Base = 0xA381,
|
CbColor7Base = 0xA381,
|
||||||
|
CbColor0Cmask = 0xA31F,
|
||||||
|
CbColor1Cmask = 0xA32E,
|
||||||
|
CbColor2Cmask = 0xA33D,
|
||||||
|
CbColor3Cmask = 0xA34C,
|
||||||
|
CbColor4Cmask = 0xA35B,
|
||||||
|
CbColor5Cmask = 0xA36A,
|
||||||
|
CbColor6Cmask = 0xA379,
|
||||||
|
CbColor7Cmask = 0xA388,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PolygonOffset {
|
struct PolygonOffset {
|
||||||
|
@ -979,7 +988,7 @@ private:
|
||||||
|
|
||||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||||
Task ProcessCompute(std::span<const u32> acb);
|
Task ProcessCompute(std::span<const u32> acb, int vqid);
|
||||||
|
|
||||||
void Process(std::stop_token stoken);
|
void Process(std::stop_token stoken);
|
||||||
|
|
||||||
|
|
|
@ -72,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Shader::Stage::Vertex: {
|
case Shader::Stage::Vertex: {
|
||||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||||
|
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||||
BuildVsOutputs(info, regs.vs_output_control);
|
BuildVsOutputs(info, regs.vs_output_control);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -254,4 +254,16 @@ void Rasterizer::UpdateDepthStencilState() {
|
||||||
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
|
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::ScopeMarkerBegin(const std::string& str) {
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||||
|
.pLabelName = str.c_str(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rasterizer::ScopeMarkerEnd() {
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.endDebugUtilsLabelEXT();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -33,6 +33,9 @@ public:
|
||||||
|
|
||||||
void DispatchDirect();
|
void DispatchDirect();
|
||||||
|
|
||||||
|
void ScopeMarkerBegin(const std::string& str);
|
||||||
|
void ScopeMarkerEnd();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||||
void MapMemory(VAddr addr, size_t size);
|
void MapMemory(VAddr addr, size_t size);
|
||||||
|
|
Loading…
Reference in New Issue