From c37679154eeb59e6bfe73ec2ff99b15096094942 Mon Sep 17 00:00:00 2001 From: Random <28494085+Random06457@users.noreply.github.com> Date: Wed, 28 Aug 2024 09:53:27 +0200 Subject: [PATCH] Handle PM4 type-2 packets (#556) * video_core: handle PM4 type-2 packets * video_core: rewrite pm4 comand type handling into a switch statement --- src/video_core/amdgpu/liverpool.cpp | 563 ++++++++++++++-------------- 1 file changed, 287 insertions(+), 276 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5b3db603..7e6ca14d 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -175,294 +175,305 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(dcb.data()); const u32 type = header->type; - if (type != 3) { - // No other types of packets were spotted so far - UNREACHABLE_MSG("Invalid PM4 type {}", type); - } - const u32 count = header->type3.NumWords(); - const PM4ItOpcode opcode = header->type3.opcode; - switch (opcode) { - case PM4ItOpcode::Nop: { - const auto* nop = reinterpret_cast(header); - if (nop->header.count.Value() == 0) { - break; - } + switch (type) { + case 0: + case 1: + UNREACHABLE_MSG("Unsupported PM4 type {}", type); + break; + case 2: + // Type-2 packet are used for padding purposes + dcb = dcb.subspan(1); + continue; + case 3: + const u32 count = header->type3.NumWords(); + const PM4ItOpcode opcode = header->type3.opcode; + switch (opcode) { + case PM4ItOpcode::Nop: { + const auto* nop = reinterpret_cast(header); + if (nop->header.count.Value() == 0) { + break; + } - switch (nop->data_block[0]) { - case PM4CmdNop::PayloadType::PatchedFlip: { - // There is no evidence that GPU CP drives flip events by parsing - // special NOP packets. For convenience lets assume that it does. - Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip); + switch (nop->data_block[0]) { + case PM4CmdNop::PayloadType::PatchedFlip: { + // There is no evidence that GPU CP drives flip events by parsing + // special NOP packets. For convenience lets assume that it does. + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPush: { + const auto marker_sz = nop->header.count.Value() * 2; + const std::string_view label{reinterpret_cast(&nop->data_block[1]), + marker_sz}; + rasterizer->ScopeMarkerBegin(label); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPop: { + rasterizer->ScopeMarkerEnd(); + break; + } + default: + break; + } break; } - case PM4CmdNop::PayloadType::DebugMarkerPush: { - const auto marker_sz = nop->header.count.Value() * 2; - const std::string_view label{reinterpret_cast(&nop->data_block[1]), - marker_sz}; - rasterizer->ScopeMarkerBegin(label); + case PM4ItOpcode::ContextControl: { break; } - case PM4CmdNop::PayloadType::DebugMarkerPop: { - rasterizer->ScopeMarkerEnd(); + case PM4ItOpcode::ClearState: { + regs.SetDefaults(); + break; + } + case PM4ItOpcode::SetConfigReg: { + const auto* set_data = reinterpret_cast(header); + const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset; + const auto* payload = reinterpret_cast(header + 2); + std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); + break; + } + case PM4ItOpcode::SetContextReg: { + const auto* set_data = reinterpret_cast(header); + const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; + const auto* payload = reinterpret_cast(header + 2); + + std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); + + // In the case of HW, render target memory has alignment as color block operates on + // tiles. There is no information of actual resource extents stored in CB context + // regs, so any deduction of it from slices/pitch will lead to a larger surface + // created. The same applies to the depth targets. Fortunately, the guest always + // sends a trailing NOP packet right after the context regs setup, so we can use the + // heuristic below and extract the hint to determine actual resource dims. + + switch (reg_addr) { + case ContextRegs::CbColor0Base: + case ContextRegs::CbColor1Base: + case ContextRegs::CbColor2Base: + case ContextRegs::CbColor3Base: + case ContextRegs::CbColor4Base: + case ContextRegs::CbColor5Base: + case ContextRegs::CbColor6Base: + case ContextRegs::CbColor7Base: { + const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) / + (ContextRegs::CbColor1Base - ContextRegs::CbColor0Base); + ASSERT(col_buf_id < NumColorBuffers); + + const auto nop_offset = header->type3.count; + if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { + ASSERT_MSG(payload[nop_offset] == 0xc0001000, + "NOP hint is missing in CB setup sequence"); + last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; + } else { + last_cb_extent[col_buf_id].raw = 0; + } + break; + } + case ContextRegs::CbColor0Cmask: + case ContextRegs::CbColor1Cmask: + case ContextRegs::CbColor2Cmask: + case ContextRegs::CbColor3Cmask: + case ContextRegs::CbColor4Cmask: + case ContextRegs::CbColor5Cmask: + case ContextRegs::CbColor6Cmask: + case ContextRegs::CbColor7Cmask: { + const auto col_buf_id = + (reg_addr - ContextRegs::CbColor0Cmask) / + (ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask); + ASSERT(col_buf_id < NumColorBuffers); + + const auto nop_offset = header->type3.count; + if (nop_offset == 0x04) { + ASSERT_MSG(payload[nop_offset] == 0xc0001000, + "NOP hint is missing in CB setup sequence"); + last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; + } + break; + } + case ContextRegs::DbZInfo: { + if (header->type3.count == 8) { + ASSERT_MSG(payload[20] == 0xc0001000, + "NOP hint is missing in DB setup sequence"); + last_db_extent.raw = payload[21]; + } else { + last_db_extent.raw = 0; + } + break; + } + default: + break; + } + break; + } + case PM4ItOpcode::SetShReg: { + const auto* set_data = reinterpret_cast(header); + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + (count - 1) * sizeof(u32)); + break; + } + case PM4ItOpcode::SetUconfigReg: { + const auto* set_data = reinterpret_cast(header); + std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], + header + 2, (count - 1) * sizeof(u32)); + break; + } + case PM4ItOpcode::IndexType: { + const auto* index_type = reinterpret_cast(header); + regs.index_buffer_type.raw = index_type->raw; + break; + } + case PM4ItOpcode::DrawIndex2: { + const auto* draw_index = reinterpret_cast(header); + regs.max_index_size = draw_index->max_size; + regs.index_base_address.base_addr_lo = draw_index->index_base_lo; + regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); + regs.num_indices = draw_index->index_count; + regs.draw_initiator = draw_index->draw_initiator; + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); + rasterizer->Draw(true); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexOffset2: { + const auto* draw_index_off = + reinterpret_cast(header); + regs.max_index_size = draw_index_off->max_size; + regs.num_indices = draw_index_off->index_count; + regs.draw_initiator = draw_index_off->draw_initiator; + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); + rasterizer->Draw(true, draw_index_off->index_offset); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexAuto: { + const auto* draw_index = reinterpret_cast(header); + regs.num_indices = draw_index->index_count; + regs.draw_initiator = draw_index->draw_initiator; + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); + rasterizer->Draw(false); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DispatchDirect: { + const auto* dispatch_direct = reinterpret_cast(header); + regs.cs_program.dim_x = dispatch_direct->dim_x; + regs.cs_program.dim_y = dispatch_direct->dim_y; + regs.cs_program.dim_z = dispatch_direct->dim_z; + regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); + rasterizer->DispatchDirect(); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::NumInstances: { + const auto* num_instances = reinterpret_cast(header); + regs.num_instances.num_instances = num_instances->num_instances; + break; + } + case PM4ItOpcode::IndexBase: { + const auto* index_base = reinterpret_cast(header); + regs.index_base_address.base_addr_lo = index_base->addr_lo; + regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi); + break; + } + case PM4ItOpcode::IndexBufferSize: { + const auto* index_size = reinterpret_cast(header); + regs.num_indices = index_size->num_indices; + break; + } + case PM4ItOpcode::EventWrite: { + // const auto* event = reinterpret_cast(header); + break; + } + case PM4ItOpcode::EventWriteEos: { + const auto* event_eos = reinterpret_cast(header); + event_eos->SignalFence(); + break; + } + case PM4ItOpcode::EventWriteEop: { + const auto* event_eop = reinterpret_cast(header); + event_eop->SignalFence(); + break; + } + case PM4ItOpcode::DmaData: { + const auto* dma_data = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WriteData: { + const auto* write_data = reinterpret_cast(header); + ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); + const u32 data_size = (header->type3.count.Value() - 2) * 4; + u64* address = write_data->Address(); + if (!write_data->wr_one_addr.Value()) { + std::memcpy(address, write_data->data, data_size); + } else { + UNREACHABLE(); + } + break; + } + case PM4ItOpcode::AcquireMem: { + // const auto* acquire_mem = reinterpret_cast(header); + break; + } + case PM4ItOpcode::WaitRegMem: { + const auto* wait_reg_mem = reinterpret_cast(header); + // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // Optimization: VO label waits are special because the emulator + // will write to the label when presentation is finished. So if + // there are no other submits to yield to we can sleep the thread + // instead and allow other tasks to run. + const u64* wait_addr = wait_reg_mem->Address(); + if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { + vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + } + while (!wait_reg_mem->Test()) { + mapped_queues[GfxQueueId].cs_state = regs.cs_program; + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(dcb_task_name); + regs.cs_program = mapped_queues[GfxQueueId].cs_state; + } + break; + } + case PM4ItOpcode::IncrementDeCounter: { + ++cblock.de_count; + break; + } + case PM4ItOpcode::WaitOnCeCounter: { + while (cblock.ce_count <= cblock.de_count) { + TracyFiberLeave; + ce_task.handle.resume(); + TracyFiberEnter(dcb_task_name); + } + break; + } + case PM4ItOpcode::PfpSyncMe: { break; } default: - break; + UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", + static_cast(opcode), count); } + dcb = dcb.subspan(header->type3.NumWords() + 1); break; } - case PM4ItOpcode::ContextControl: { - break; - } - case PM4ItOpcode::ClearState: { - regs.SetDefaults(); - break; - } - case PM4ItOpcode::SetConfigReg: { - const auto* set_data = reinterpret_cast(header); - const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset; - const auto* payload = reinterpret_cast(header + 2); - std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); - break; - } - case PM4ItOpcode::SetContextReg: { - const auto* set_data = reinterpret_cast(header); - const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; - const auto* payload = reinterpret_cast(header + 2); - - std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); - - // In the case of HW, render target memory has alignment as color block operates on - // tiles. There is no information of actual resource extents stored in CB context - // regs, so any deduction of it from slices/pitch will lead to a larger surface created. - // The same applies to the depth targets. Fortunately, the guest always sends - // a trailing NOP packet right after the context regs setup, so we can use the heuristic - // below and extract the hint to determine actual resource dims. - - switch (reg_addr) { - case ContextRegs::CbColor0Base: - case ContextRegs::CbColor1Base: - case ContextRegs::CbColor2Base: - case ContextRegs::CbColor3Base: - case ContextRegs::CbColor4Base: - case ContextRegs::CbColor5Base: - case ContextRegs::CbColor6Base: - case ContextRegs::CbColor7Base: { - const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) / - (ContextRegs::CbColor1Base - ContextRegs::CbColor0Base); - ASSERT(col_buf_id < NumColorBuffers); - - const auto nop_offset = header->type3.count; - if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { - ASSERT_MSG(payload[nop_offset] == 0xc0001000, - "NOP hint is missing in CB setup sequence"); - last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; - } else { - last_cb_extent[col_buf_id].raw = 0; - } - break; - } - case ContextRegs::CbColor0Cmask: - case ContextRegs::CbColor1Cmask: - case ContextRegs::CbColor2Cmask: - case ContextRegs::CbColor3Cmask: - case ContextRegs::CbColor4Cmask: - case ContextRegs::CbColor5Cmask: - case ContextRegs::CbColor6Cmask: - case ContextRegs::CbColor7Cmask: { - const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Cmask) / - (ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask); - ASSERT(col_buf_id < NumColorBuffers); - - const auto nop_offset = header->type3.count; - if (nop_offset == 0x04) { - ASSERT_MSG(payload[nop_offset] == 0xc0001000, - "NOP hint is missing in CB setup sequence"); - last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; - } - break; - } - case ContextRegs::DbZInfo: { - if (header->type3.count == 8) { - ASSERT_MSG(payload[20] == 0xc0001000, - "NOP hint is missing in DB setup sequence"); - last_db_extent.raw = payload[21]; - } else { - last_db_extent.raw = 0; - } - break; - } - default: - break; - } - break; - } - case PM4ItOpcode::SetShReg: { - const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); - break; - } - case PM4ItOpcode::SetUconfigReg: { - const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); - break; - } - case PM4ItOpcode::IndexType: { - const auto* index_type = reinterpret_cast(header); - regs.index_buffer_type.raw = index_type->raw; - break; - } - case PM4ItOpcode::DrawIndex2: { - const auto* draw_index = reinterpret_cast(header); - regs.max_index_size = draw_index->max_size; - regs.index_base_address.base_addr_lo = draw_index->index_base_lo; - regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); - regs.num_indices = draw_index->index_count; - regs.draw_initiator = draw_index->draw_initiator; - if (rasterizer) { - const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); - rasterizer->Draw(true); - rasterizer->ScopeMarkerEnd(); - } - break; - } - case PM4ItOpcode::DrawIndexOffset2: { - const auto* draw_index_off = reinterpret_cast(header); - regs.max_index_size = draw_index_off->max_size; - regs.num_indices = draw_index_off->index_count; - regs.draw_initiator = draw_index_off->draw_initiator; - if (rasterizer) { - const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); - rasterizer->Draw(true, draw_index_off->index_offset); - rasterizer->ScopeMarkerEnd(); - } - break; - } - case PM4ItOpcode::DrawIndexAuto: { - const auto* draw_index = reinterpret_cast(header); - regs.num_indices = draw_index->index_count; - regs.draw_initiator = draw_index->draw_initiator; - if (rasterizer) { - const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); - rasterizer->Draw(false); - rasterizer->ScopeMarkerEnd(); - } - break; - } - case PM4ItOpcode::DispatchDirect: { - const auto* dispatch_direct = reinterpret_cast(header); - regs.cs_program.dim_x = dispatch_direct->dim_x; - regs.cs_program.dim_y = dispatch_direct->dim_y; - regs.cs_program.dim_z = dispatch_direct->dim_z; - regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; - if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); - rasterizer->Breadcrumb(u64(cmd_address)); - rasterizer->DispatchDirect(); - rasterizer->ScopeMarkerEnd(); - } - break; - } - case PM4ItOpcode::NumInstances: { - const auto* num_instances = reinterpret_cast(header); - regs.num_instances.num_instances = num_instances->num_instances; - break; - } - case PM4ItOpcode::IndexBase: { - const auto* index_base = reinterpret_cast(header); - regs.index_base_address.base_addr_lo = index_base->addr_lo; - regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi); - break; - } - case PM4ItOpcode::IndexBufferSize: { - const auto* index_size = reinterpret_cast(header); - regs.num_indices = index_size->num_indices; - break; - } - case PM4ItOpcode::EventWrite: { - // const auto* event = reinterpret_cast(header); - break; - } - case PM4ItOpcode::EventWriteEos: { - const auto* event_eos = reinterpret_cast(header); - event_eos->SignalFence(); - break; - } - case PM4ItOpcode::EventWriteEop: { - const auto* event_eop = reinterpret_cast(header); - event_eop->SignalFence(); - break; - } - case PM4ItOpcode::DmaData: { - const auto* dma_data = reinterpret_cast(header); - break; - } - case PM4ItOpcode::WriteData: { - const auto* write_data = reinterpret_cast(header); - ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); - const u32 data_size = (header->type3.count.Value() - 2) * 4; - u64* address = write_data->Address(); - if (!write_data->wr_one_addr.Value()) { - std::memcpy(address, write_data->data, data_size); - } else { - UNREACHABLE(); - } - break; - } - case PM4ItOpcode::AcquireMem: { - // const auto* acquire_mem = reinterpret_cast(header); - break; - } - case PM4ItOpcode::WaitRegMem: { - const auto* wait_reg_mem = reinterpret_cast(header); - // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); - // Optimization: VO label waits are special because the emulator - // will write to the label when presentation is finished. So if - // there are no other submits to yield to we can sleep the thread - // instead and allow other tasks to run. - const u64* wait_addr = wait_reg_mem->Address(); - if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) { - vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); - } - while (!wait_reg_mem->Test()) { - mapped_queues[GfxQueueId].cs_state = regs.cs_program; - TracyFiberLeave; - co_yield {}; - TracyFiberEnter(dcb_task_name); - regs.cs_program = mapped_queues[GfxQueueId].cs_state; - } - break; - } - case PM4ItOpcode::IncrementDeCounter: { - ++cblock.de_count; - break; - } - case PM4ItOpcode::WaitOnCeCounter: { - while (cblock.ce_count <= cblock.de_count) { - TracyFiberLeave; - ce_task.handle.resume(); - TracyFiberEnter(dcb_task_name); - } - break; - } - case PM4ItOpcode::PfpSyncMe: { - break; - } - default: - UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", - static_cast(opcode), count); - } - dcb = dcb.subspan(header->type3.NumWords() + 1); } if (ce_task.handle) {