Handle PM4 type-2 packets (#556)
* video_core: handle PM4 type-2 packets * video_core: rewrite pm4 comand type handling into a switch statement
This commit is contained in:
parent
e4254ebdaa
commit
c37679154e
|
@ -175,294 +175,305 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
while (!dcb.empty()) {
|
||||
const auto* header = reinterpret_cast<const PM4Header*>(dcb.data());
|
||||
const u32 type = header->type;
|
||||
if (type != 3) {
|
||||
// No other types of packets were spotted so far
|
||||
UNREACHABLE_MSG("Invalid PM4 type {}", type);
|
||||
}
|
||||
|
||||
const u32 count = header->type3.NumWords();
|
||||
const PM4ItOpcode opcode = header->type3.opcode;
|
||||
switch (opcode) {
|
||||
case PM4ItOpcode::Nop: {
|
||||
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
||||
if (nop->header.count.Value() == 0) {
|
||||
break;
|
||||
}
|
||||
switch (type) {
|
||||
case 0:
|
||||
case 1:
|
||||
UNREACHABLE_MSG("Unsupported PM4 type {}", type);
|
||||
break;
|
||||
case 2:
|
||||
// Type-2 packet are used for padding purposes
|
||||
dcb = dcb.subspan(1);
|
||||
continue;
|
||||
case 3:
|
||||
const u32 count = header->type3.NumWords();
|
||||
const PM4ItOpcode opcode = header->type3.opcode;
|
||||
switch (opcode) {
|
||||
case PM4ItOpcode::Nop: {
|
||||
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
||||
if (nop->header.count.Value() == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
switch (nop->data_block[0]) {
|
||||
case PM4CmdNop::PayloadType::PatchedFlip: {
|
||||
// There is no evidence that GPU CP drives flip events by parsing
|
||||
// special NOP packets. For convenience lets assume that it does.
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
|
||||
switch (nop->data_block[0]) {
|
||||
case PM4CmdNop::PayloadType::PatchedFlip: {
|
||||
// There is no evidence that GPU CP drives flip events by parsing
|
||||
// special NOP packets. For convenience lets assume that it does.
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPush: {
|
||||
const auto marker_sz = nop->header.count.Value() * 2;
|
||||
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
|
||||
marker_sz};
|
||||
rasterizer->ScopeMarkerBegin(label);
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPop: {
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPush: {
|
||||
const auto marker_sz = nop->header.count.Value() * 2;
|
||||
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
|
||||
marker_sz};
|
||||
rasterizer->ScopeMarkerBegin(label);
|
||||
case PM4ItOpcode::ContextControl: {
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPop: {
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
case PM4ItOpcode::ClearState: {
|
||||
regs.SetDefaults();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetConfigReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
|
||||
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
||||
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetContextReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
|
||||
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
||||
|
||||
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
||||
|
||||
// In the case of HW, render target memory has alignment as color block operates on
|
||||
// tiles. There is no information of actual resource extents stored in CB context
|
||||
// regs, so any deduction of it from slices/pitch will lead to a larger surface
|
||||
// created. The same applies to the depth targets. Fortunately, the guest always
|
||||
// sends a trailing NOP packet right after the context regs setup, so we can use the
|
||||
// heuristic below and extract the hint to determine actual resource dims.
|
||||
|
||||
switch (reg_addr) {
|
||||
case ContextRegs::CbColor0Base:
|
||||
case ContextRegs::CbColor1Base:
|
||||
case ContextRegs::CbColor2Base:
|
||||
case ContextRegs::CbColor3Base:
|
||||
case ContextRegs::CbColor4Base:
|
||||
case ContextRegs::CbColor5Base:
|
||||
case ContextRegs::CbColor6Base:
|
||||
case ContextRegs::CbColor7Base: {
|
||||
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
|
||||
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
|
||||
ASSERT(col_buf_id < NumColorBuffers);
|
||||
|
||||
const auto nop_offset = header->type3.count;
|
||||
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
|
||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||
"NOP hint is missing in CB setup sequence");
|
||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||
} else {
|
||||
last_cb_extent[col_buf_id].raw = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ContextRegs::CbColor0Cmask:
|
||||
case ContextRegs::CbColor1Cmask:
|
||||
case ContextRegs::CbColor2Cmask:
|
||||
case ContextRegs::CbColor3Cmask:
|
||||
case ContextRegs::CbColor4Cmask:
|
||||
case ContextRegs::CbColor5Cmask:
|
||||
case ContextRegs::CbColor6Cmask:
|
||||
case ContextRegs::CbColor7Cmask: {
|
||||
const auto col_buf_id =
|
||||
(reg_addr - ContextRegs::CbColor0Cmask) /
|
||||
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
|
||||
ASSERT(col_buf_id < NumColorBuffers);
|
||||
|
||||
const auto nop_offset = header->type3.count;
|
||||
if (nop_offset == 0x04) {
|
||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||
"NOP hint is missing in CB setup sequence");
|
||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ContextRegs::DbZInfo: {
|
||||
if (header->type3.count == 8) {
|
||||
ASSERT_MSG(payload[20] == 0xc0001000,
|
||||
"NOP hint is missing in DB setup sequence");
|
||||
last_db_extent.raw = payload[21];
|
||||
} else {
|
||||
last_db_extent.raw = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset],
|
||||
header + 2, (count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
|
||||
regs.index_buffer_type.raw = index_type->raw;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndex2: {
|
||||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
|
||||
regs.max_index_size = draw_index->max_size;
|
||||
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
||||
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexOffset2: {
|
||||
const auto* draw_index_off =
|
||||
reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
|
||||
regs.max_index_size = draw_index_off->max_size;
|
||||
regs.num_indices = draw_index_off->index_count;
|
||||
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true, draw_index_off->index_offset);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexAuto: {
|
||||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(false);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::NumInstances: {
|
||||
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
||||
regs.num_instances.num_instances = num_instances->num_instances;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBase: {
|
||||
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
|
||||
regs.index_base_address.base_addr_lo = index_base->addr_lo;
|
||||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBufferSize: {
|
||||
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||
regs.num_indices = index_size->num_indices;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEos: {
|
||||
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
|
||||
event_eos->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEop: {
|
||||
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
||||
event_eop->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WriteData: {
|
||||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
u64* address = write_data->Address<u64*>();
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(address, write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IncrementDeCounter: {
|
||||
++cblock.de_count;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitOnCeCounter: {
|
||||
while (cblock.ce_count <= cblock.de_count) {
|
||||
TracyFiberLeave;
|
||||
ce_task.handle.resume();
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::PfpSyncMe: {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||
static_cast<u32>(opcode), count);
|
||||
}
|
||||
dcb = dcb.subspan(header->type3.NumWords() + 1);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::ContextControl: {
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::ClearState: {
|
||||
regs.SetDefaults();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetConfigReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
|
||||
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
||||
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetContextReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
|
||||
const auto* payload = reinterpret_cast<const u32*>(header + 2);
|
||||
|
||||
std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
|
||||
|
||||
// In the case of HW, render target memory has alignment as color block operates on
|
||||
// tiles. There is no information of actual resource extents stored in CB context
|
||||
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
|
||||
// The same applies to the depth targets. Fortunately, the guest always sends
|
||||
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
|
||||
// below and extract the hint to determine actual resource dims.
|
||||
|
||||
switch (reg_addr) {
|
||||
case ContextRegs::CbColor0Base:
|
||||
case ContextRegs::CbColor1Base:
|
||||
case ContextRegs::CbColor2Base:
|
||||
case ContextRegs::CbColor3Base:
|
||||
case ContextRegs::CbColor4Base:
|
||||
case ContextRegs::CbColor5Base:
|
||||
case ContextRegs::CbColor6Base:
|
||||
case ContextRegs::CbColor7Base: {
|
||||
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
|
||||
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
|
||||
ASSERT(col_buf_id < NumColorBuffers);
|
||||
|
||||
const auto nop_offset = header->type3.count;
|
||||
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
|
||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||
"NOP hint is missing in CB setup sequence");
|
||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||
} else {
|
||||
last_cb_extent[col_buf_id].raw = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ContextRegs::CbColor0Cmask:
|
||||
case ContextRegs::CbColor1Cmask:
|
||||
case ContextRegs::CbColor2Cmask:
|
||||
case ContextRegs::CbColor3Cmask:
|
||||
case ContextRegs::CbColor4Cmask:
|
||||
case ContextRegs::CbColor5Cmask:
|
||||
case ContextRegs::CbColor6Cmask:
|
||||
case ContextRegs::CbColor7Cmask: {
|
||||
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Cmask) /
|
||||
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
|
||||
ASSERT(col_buf_id < NumColorBuffers);
|
||||
|
||||
const auto nop_offset = header->type3.count;
|
||||
if (nop_offset == 0x04) {
|
||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||
"NOP hint is missing in CB setup sequence");
|
||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ContextRegs::DbZInfo: {
|
||||
if (header->type3.count == 8) {
|
||||
ASSERT_MSG(payload[20] == 0xc0001000,
|
||||
"NOP hint is missing in DB setup sequence");
|
||||
last_db_extent.raw = payload[21];
|
||||
} else {
|
||||
last_db_extent.raw = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
|
||||
regs.index_buffer_type.raw = index_type->raw;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndex2: {
|
||||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
|
||||
regs.max_index_size = draw_index->max_size;
|
||||
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
||||
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexOffset2: {
|
||||
const auto* draw_index_off = reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
|
||||
regs.max_index_size = draw_index_off->max_size;
|
||||
regs.num_indices = draw_index_off->index_count;
|
||||
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true, draw_index_off->index_offset);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexAuto: {
|
||||
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(false);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchDirect: {
|
||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::NumInstances: {
|
||||
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
|
||||
regs.num_instances.num_instances = num_instances->num_instances;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBase: {
|
||||
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
|
||||
regs.index_base_address.base_addr_lo = index_base->addr_lo;
|
||||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBufferSize: {
|
||||
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||
regs.num_indices = index_size->num_indices;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEos: {
|
||||
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
|
||||
event_eos->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEop: {
|
||||
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
||||
event_eop->SignalFence();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WriteData: {
|
||||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
u64* address = write_data->Address<u64*>();
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(address, write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IncrementDeCounter: {
|
||||
++cblock.de_count;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::WaitOnCeCounter: {
|
||||
while (cblock.ce_count <= cblock.de_count) {
|
||||
TracyFiberLeave;
|
||||
ce_task.handle.resume();
|
||||
TracyFiberEnter(dcb_task_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::PfpSyncMe: {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||
static_cast<u32>(opcode), count);
|
||||
}
|
||||
dcb = dcb.subspan(header->type3.NumWords() + 1);
|
||||
}
|
||||
|
||||
if (ce_task.handle) {
|
||||
|
|
Loading…
Reference in New Issue