From 1b94f07a6a00e4e72fa543cc8f634ffeff0b485e Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 12 Jul 2024 18:12:06 +0200 Subject: [PATCH 1/7] recompiler: proper VS inputs initialization --- .../spirv/emit_spirv_context_get_set.cpp | 19 ++++++++++++++----- .../frontend/translate/translate.cpp | 16 +++++++++++++--- src/shader_recompiler/ir/attribute.h | 2 ++ src/shader_recompiler/runtime_info.h | 1 + src/video_core/amdgpu/liverpool.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 6 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 157023b6..75ee3ae9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -131,6 +131,13 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) { return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index)); } +Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { + return ctx.OpLoad( + ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.instance_step_rates, + rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); +} + Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; @@ -149,11 +156,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpLoad(param.component_type, param.id); } } else { - const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value; - const auto step_rate = ctx.OpLoad( - ctx.U32[1], - ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.instance_step_rates, rate_idx)); + const auto step_rate = EmitReadStepRate(ctx, param.id.value); const auto offset = ctx.OpIAdd( ctx.U32[1], ctx.OpIMul( @@ -182,6 +185,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { switch (attr) { case IR::Attribute::VertexId: return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); + case IR::Attribute::InstanceId: + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + case IR::Attribute::InstanceId0: + return EmitReadStepRate(ctx, 0); + case IR::Attribute::InstanceId1: + return EmitReadStepRate(ctx, 1); case IR::Attribute::WorkgroupId: return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp); case IR::Attribute::LocalInvocationId: diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 4ec4128a..31d5f0f1 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -35,10 +35,20 @@ void Translator::EmitPrologue() { IR::VectorReg dst_vreg = IR::VectorReg::V0; switch (info.stage) { case Stage::Vertex: - // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 + // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + // v1: instance ID, step rate 0 + if (info.num_input_vgprs > 0) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0)); + } + // v2: instance ID, step rate 1 + if (info.num_input_vgprs > 1) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1)); + } + // v3: instance ID, plain + if (info.num_input_vgprs > 2) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); + } break; case Stage::Fragment: // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258 diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 0cfbc421..3f95ff7a 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,6 +72,8 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, + InstanceId0 = 78, // step rate 0 + InstanceId1 = 79, // step rate 1 Max, }; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index ce3b64fc..054faafe 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -163,6 +163,7 @@ struct Info { std::array workgroup_size{}; u32 num_user_data; + u32 num_input_vgprs; std::span user_data; Stage stage; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 536167ff..bcb609e2 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -80,6 +80,7 @@ struct Liverpool { union { BitField<0, 6, u64> num_vgprs; BitField<6, 4, u64> num_sgprs; + BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs BitField<33, 5, u64> num_user_regs; } settings; UserData user_data; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bf4bbc10..84eea78c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -72,6 +72,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_d switch (stage) { case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; BuildVsOutputs(info, regs.vs_output_control); break; } From f041276b0407d6a2de12298970d74be6d5beb0ee Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 13 Jul 2024 14:40:39 +0200 Subject: [PATCH 2/7] recompiler: added support for discard on export with masked EXEC --- .../backend/spirv/emit_spirv_instructions.h | 1 + .../backend/spirv/emit_spirv_special.cpp | 11 ++++++++++ .../frontend/control_flow_graph.cpp | 22 ++++++++++++++----- .../frontend/control_flow_graph.h | 1 + .../frontend/structured_control_flow.cpp | 1 + .../frontend/translate/export.cpp | 6 +++++ src/shader_recompiler/ir/basic_block.h | 2 ++ src/shader_recompiler/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/microinstruction.cpp | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + .../ir/passes/shader_info_collection_pass.cpp | 1 + 12 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 495ada5d..e0b19f4f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -45,6 +45,7 @@ void EmitSetVccHi(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); +void EmitDiscardCond(EmitContext& ctx, Id condition); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 0ef985a9..891e41df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -14,6 +14,17 @@ void EmitDiscard(EmitContext& ctx) { ctx.OpDemoteToHelperInvocationEXT(); } +void EmitDiscardCond(EmitContext& ctx, Id condition) { + const Id kill_label{ctx.OpLabel()}; + const Id merge_label{ctx.OpLabel()}; + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(condition, kill_label, merge_label); + ctx.AddLabel(kill_label); + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(merge_label); + ctx.AddLabel(merge_label); +} + void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 03af1515..5eadae1b 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -121,7 +121,7 @@ void CFG::EmitBlocks() { void CFG::LinkBlocks() { const auto get_block = [this](u32 address) { - const auto it = blocks.find(address, Compare{}); + auto it = blocks.find(address, Compare{}); ASSERT_MSG(it != blocks.end() && it->begin == address); return &*it; }; @@ -131,7 +131,10 @@ void CFG::LinkBlocks() { // If the block doesn't end with a branch we simply // need to link with the next block. if (!end_inst.IsTerminateInstruction()) { - block.branch_true = get_block(block.end); + auto* next_block = get_block(block.end); + ++next_block->num_predecessors; + + block.branch_true = next_block; block.end_class = EndClass::Branch; continue; } @@ -141,11 +144,20 @@ void CFG::LinkBlocks() { const u32 branch_pc = block.end - end_inst.length; const u32 target_pc = end_inst.BranchTarget(branch_pc); if (end_inst.IsUnconditionalBranch()) { - block.branch_true = get_block(target_pc); + auto* target_block = get_block(target_pc); + ++target_block->num_predecessors; + + block.branch_true = target_block; block.end_class = EndClass::Branch; } else if (end_inst.IsConditionalBranch()) { - block.branch_true = get_block(target_pc); - block.branch_false = get_block(block.end); + auto* target_block = get_block(target_pc); + ++target_block->num_predecessors; + + auto* end_block = get_block(block.end); + ++end_block->num_predecessors; + + block.branch_true = target_block; + block.branch_false = end_block; block.end_class = EndClass::Branch; } else if (end_inst.opcode == Opcode::S_ENDPGM) { const auto& prev_inst = inst_list[block.end_index - 1]; diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index d343ca7d..07190087 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -36,6 +36,7 @@ struct Block : Hook { u32 end; u32 begin_index; u32 end_index; + u32 num_predecessors{}; IR::Condition cond{}; GcnInst end_inst{}; EndClass end_class{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 6d78448b..346f00aa 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -631,6 +631,7 @@ private: case StatementType::Code: { ensure_block(); if (!stmt.block->is_dummy) { + current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1; const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 74aac4fb..cc631ff2 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -1,11 +1,17 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { void Translator::EXP(const GcnInst& inst) { + if (ir.block->has_multiple_predecessors) { + LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation"); + ir.Discard(ir.LogicalNot(ir.GetExec())); + } + const auto& exp = inst.control.exp; const IR::Attribute attrib{exp.target}; const std::array vsrc = { diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 5cd36420..5a7036c6 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -149,6 +149,8 @@ public: std::array ssa_sreg_values; std::array ssa_vreg_values; + bool has_multiple_predecessors{false}; + private: /// Memory pool for instruction list ObjectPool* inst_pool; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 44128f23..5dabbb4c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -115,6 +115,10 @@ void IREmitter::Discard() { Inst(Opcode::Discard); } +void IREmitter::Discard(const U1& cond) { + Inst(Opcode::DiscardCond, cond); +} + void IREmitter::Barrier() { Inst(Opcode::Barrier); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 51ab9d03..5d6fd714 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -42,6 +42,7 @@ public: void Prologue(); void Epilogue(); void Discard(); + void Discard(const U1& cond); void Barrier(); void WorkgroupMemoryBarrier(); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index f823980a..aa03e3d6 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -49,6 +49,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Prologue: case Opcode::Epilogue: case Opcode::Discard: + case Opcode::DiscardCond: case Opcode::SetAttribute: case Opcode::StoreBufferF32: case Opcode::StoreBufferF32x2: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index c22db3e0..94ef1784 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(PhiMove, Void, Opaq OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) OPCODE(Discard, Void, ) +OPCODE(DiscardCond, Void, U1, ) // Constant memory operations OPCODE(ReadConst, U32, U32x2, U32, ) diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 1cec237f..b51ce94e 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -37,6 +37,7 @@ void Visit(Info& info, IR::Inst& inst) { info.uses_group_quad = true; break; case IR::Opcode::Discard: + case IR::Opcode::DiscardCond: info.has_discard = true; break; case IR::Opcode::ImageGather: From 8144f835a9bb600ba97dfdc0440f9c6ef4892a56 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 14 Jul 2024 10:58:55 +0200 Subject: [PATCH 3/7] amdgpu: additional heuristic for CB extents detection Found in CUSA00144 --- src/video_core/amdgpu/liverpool.cpp | 27 ++++++++++++++++++++------- src/video_core/amdgpu/liverpool.h | 8 ++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 4bc73c67..590660b3 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -199,19 +199,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::spantype3.count; + if (nop_offset == 0x04) { + ASSERT_MSG(payload[nop_offset] == 0xc0001000, + "NOP hint is missing in CB setup sequence"); + last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; + } + break; + } case ContextRegs::DbZInfo: { if (header->type3.count == 8) { ASSERT_MSG(payload[20] == 0xc0001000, diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index bcb609e2..a9117867 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -786,6 +786,14 @@ struct Liverpool { CbColor5Base = 0xA363, CbColor6Base = 0xA372, CbColor7Base = 0xA381, + CbColor0Cmask = 0xA31F, + CbColor1Cmask = 0xA32E, + CbColor2Cmask = 0xA33D, + CbColor3Cmask = 0xA34C, + CbColor4Cmask = 0xA35B, + CbColor5Cmask = 0xA36A, + CbColor6Cmask = 0xA379, + CbColor7Cmask = 0xA388, }; struct PolygonOffset { From b8916787b2654d68992fe30562e5fc7ef2590615 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 14 Jul 2024 11:37:52 +0200 Subject: [PATCH 4/7] renderer: debug markers for ability to match cmdlists with rdoc captures --- src/video_core/amdgpu/liverpool.cpp | 21 ++++++++++++++++--- src/video_core/amdgpu/liverpool.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 12 +++++++++++ .../renderer_vulkan/vk_rasterizer.h | 3 +++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 590660b3..e5c17583 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -280,7 +280,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { + rasterizer->ScopeMarkerBegin( + fmt::format("dcb:{}:DrawIndex2", reinterpret_cast(dcb.data()))); rasterizer->Draw(true); + rasterizer->ScopeMarkerEnd(); } break; } @@ -290,7 +293,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (rasterizer) { + rasterizer->ScopeMarkerBegin(fmt::format( + "dcb:{}:DrawIndexOffset2", reinterpret_cast(dcb.data()))); rasterizer->Draw(true, draw_index_off->index_offset); + rasterizer->ScopeMarkerEnd(); } break; } @@ -299,7 +305,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { + rasterizer->ScopeMarkerBegin( + fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast(dcb.data()))); rasterizer->Draw(false); + rasterizer->ScopeMarkerEnd(); } break; } @@ -310,7 +319,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + rasterizer->ScopeMarkerBegin( + fmt::format("dcb:{}:Dispatch", reinterpret_cast(dcb.data()))); rasterizer->DispatchDirect(); + rasterizer->ScopeMarkerEnd(); } break; } @@ -421,8 +433,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb) { } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = - ProcessCompute({indirect_buffer->Address(), indirect_buffer->ib_size}); + auto task = ProcessCompute( + {indirect_buffer->Address(), indirect_buffer->ib_size}, vqid); while (!task.handle.done()) { task.handle.resume(); @@ -448,7 +460,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb) { regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + rasterizer->ScopeMarkerBegin(fmt::format( + "acb[{}]:{}:Dispatch", vqid, reinterpret_cast(acb.data()))); rasterizer->DispatchDirect(); + rasterizer->ScopeMarkerEnd(); } break; } @@ -508,7 +523,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span acb) { ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); auto& queue = mapped_queues[vqid]; - const auto& task = ProcessCompute(acb); + const auto& task = ProcessCompute(acb, vqid); { std::unique_lock lock{queue.m_access}; queue.submits.emplace(task.handle); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index a9117867..0711b074 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -988,7 +988,7 @@ private: Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); - Task ProcessCompute(std::span acb); + Task ProcessCompute(std::span acb, int vqid); void Process(std::stop_token stoken); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6f8ab1a6..d0944fcc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -254,4 +254,16 @@ void Rasterizer::UpdateDepthStencilState() { cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); } +void Rasterizer::ScopeMarkerBegin(const std::string& str) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = str.c_str(), + }); +} + +void Rasterizer::ScopeMarkerEnd() { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.endDebugUtilsLabelEXT(); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7bf1ab9b..aead5955 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -33,6 +33,9 @@ public: void DispatchDirect(); + void ScopeMarkerBegin(const std::string& str); + void ScopeMarkerEnd(); + private: u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset); void MapMemory(VAddr addr, size_t size); From dc50cc55fb5d89ac6d7bbd8b294bb7905abc4e35 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sun, 14 Jul 2024 17:11:54 +0200 Subject: [PATCH 5/7] missing line fix --- src/video_core/amdgpu/liverpool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index e5c17583..d36142ad 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -412,7 +412,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span acb) { +Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberEnter(acb_task_name); while (!acb.empty()) { From 746792eda028028e646ba7d8e681d3825baf3423 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 14 Jul 2024 18:20:31 +0200 Subject: [PATCH 6/7] libraries: kernel: added `sceKernelGetEventId` --- src/core/libraries/kernel/event_queues.cpp | 4 ++++ src/core/libraries/kernel/event_queues.h | 1 + src/core/libraries/kernel/libkernel.cpp | 1 + 3 files changed, 6 insertions(+) diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index aee4613c..12f59e50 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -185,6 +185,10 @@ void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev) { return ev->udata; } +u64 PS4_SYSV_ABI sceKernelGetEventId(const SceKernelEvent* ev) { + return ev->ident; +} + int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; diff --git a/src/core/libraries/kernel/event_queues.h b/src/core/libraries/kernel/event_queues.h index 2549203e..0f9c42a9 100644 --- a/src/core/libraries/kernel/event_queues.h +++ b/src/core/libraries/kernel/event_queues.h @@ -15,6 +15,7 @@ int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq); int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, SceKernelUseconds* timo); void* PS4_SYSV_ABI sceKernelGetEventUserData(const SceKernelEvent* ev); +u64 PS4_SYSV_ABI sceKernelGetEventId(const SceKernelEvent* ev); int PS4_SYSV_ABI sceKernelTriggerUserEvent(SceKernelEqueue eq, int id, void* udata); int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id); int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index 0c5b3917..9f57ff53 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -401,6 +401,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); + LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); // misc LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); From 034301de887aedb64d8aaaf56d50d1655099cf09 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 14 Jul 2024 18:27:28 +0200 Subject: [PATCH 7/7] libraries: gnmdriver: added missing ASC PM4 dump --- src/core/libraries/gnmdriver/gnmdriver.cpp | 27 +++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index dcf6d99e..2e475413 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -495,8 +495,33 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr + *asc_queue.read_addr); const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr : (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr; + const std::span acb_span{acb_ptr, acb_size >> 2u}; - liverpool->SubmitAsc(vqid, {acb_ptr, acb_size >> 2u}); + if (Config::dumpPM4()) { + static auto last_frame_num = -1LL; + static u32 seq_num{}; + if (last_frame_num == frames_submitted) { + ++seq_num; + } else { + last_frame_num = frames_submitted; + seq_num = 0u; + } + + // Up to this point, all ACB submissions have been stored in a secondary command buffer. + // Dumping them using the current ring pointer would result in files containing only the + // `IndirectBuffer` command. To access the actual command stream, we need to unwrap the IB. + auto acb = acb_span; + const auto* indirect_buffer = + reinterpret_cast(acb_span.data()); + if (indirect_buffer->header.opcode == PM4ItOpcode::IndirectBuffer) { + acb = {indirect_buffer->Address(), indirect_buffer->ib_size}; + } + + // File name format is: __ + DumpCommandList(acb, std::format("acb_{}_{}", gnm_vqid, seq_num)); + } + + liverpool->SubmitAsc(vqid, acb_span); *asc_queue.read_addr += acb_size; *asc_queue.read_addr %= asc_queue.ring_size_dw * 4;