core: libraries: gnmdriver: basic functionality extension (#120)

Also a bit of refactoring in `video_core`
This commit is contained in:
psucien 2024-05-01 18:29:06 +02:00 committed by GitHub
parent 1b9bf924ca
commit 7e8d90d609
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 402 additions and 185 deletions

View File

@ -16,6 +16,20 @@ using namespace AmdGpu;
static std::unique_ptr<AmdGpu::Liverpool> liverpool; static std::unique_ptr<AmdGpu::Liverpool> liverpool;
// In case of precise gnm driver emulation we need to send a bunch of HW-specific
// initialization commands. It may slowdown development at early stage as their
// support is not important and can be ignored for a while.
static constexpr bool g_fair_hw_init = false;
// Write a special ending NOP packet with N DWs data block
template <u32 data_block_size>
static inline u32* WriteTrailingNop(u32* cmdbuf) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized
return cmdbuf + data_block_size + 1 /* header */;
}
int PS4_SYSV_ABI sceGnmAddEqEvent() { int PS4_SYSV_ABI sceGnmAddEqEvent() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
@ -31,9 +45,30 @@ int PS4_SYSV_ABI sceGnmBeginWorkload() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmComputeWaitOnAddress() { s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); u32 cmp_func, u32 ref) {
return ORBIS_OK; LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 0xe)) {
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 3);
cmdbuf = WriteBody(cmdbuf, 0u);
cmdbuf += 2;
const u32 is_mem = addr > 0xffffu;
const u32 addr_mask = is_mem ? 0xfffffffcu : 0xffffu;
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u);
wait_reg_mem->poll_addr_lo = u32(addr & addr_mask);
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
wait_reg_mem->ref = ref;
wait_reg_mem->mask = mask;
wait_reg_mem->poll_interval = 10;
WriteTrailingNop<2>(cmdbuf + 7);
return ORBIS_OK;
}
return -1;
} }
int PS4_SYSV_ABI sceGnmComputeWaitSemaphore() { int PS4_SYSV_ABI sceGnmComputeWaitSemaphore() {
@ -121,14 +156,37 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDispatchDirect() { s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); u32 threads_z, u32 flags) {
return ORBIS_OK; LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 9) && ((s32)(threads_x | threads_y | threads_z) > -1)) {
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DispatchDirect>(cmdbuf, 4, PM4ShaderType::ShaderCompute,
predicate);
cmdbuf = WriteBody(cmdbuf, threads_x, threads_y, threads_z);
cmdbuf[0] = (flags & 0x18) + 1; // ordered append mode
WriteTrailingNop<3>(cmdbuf + 1);
return ORBIS_OK;
}
return -1;
} }
int PS4_SYSV_ABI sceGnmDispatchIndirect() { s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK;
if (cmdbuf && (size == 7)) {
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DispatchIndirect>(cmdbuf, 2, PM4ShaderType::ShaderCompute,
predicate);
cmdbuf[0] = data_offset;
cmdbuf[1] = (flags & 0x18) + 1; // ordered append mode
WriteTrailingNop<3>(cmdbuf + 2);
return ORBIS_OK;
}
return -1;
} }
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() { int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() {
@ -136,47 +194,60 @@ int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState() { u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK;
if (size > 0xff) {
if constexpr (g_fair_hw_init) {
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
cmdbuf = WriteHeader<PM4ItOpcode::Unknown58>(
cmdbuf, 6); // for some reason the packet indicates larger size
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
} else {
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
}
return 0x100; // it is a size, not a retcode
}
return 0;
} }
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u64 size, u32 index_count, uintptr_t index_addr, s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
u32 flags, u32 type) { u32 flags, u32 type) {
LOG_INFO(Lib_GnmDriver, LOG_TRACE(Lib_GnmDriver, "called");
"(STUBBED) called cmd_buffer = 0x{:x} size = {} index_count = {} index_addr = 0x{:x} "
"flags = 0x{:x} type = {}",
reinterpret_cast<uint64_t>(cmdbuf), size, index_count, index_addr, flags, type);
if (cmdbuf && (size == 10) && (index_addr != 0) && (index_addr & 1) == 0 && if (cmdbuf && (size == 10) && (index_addr != 0) && (index_addr & 1) == 0 &&
(flags & 0x1ffffffe) == 0) { (flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(cmdbuf); auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(cmdbuf);
draw_index->header = draw_index->header =
PM4Type3Header{PM4ItOpcode::DrawIndex2, 4, PM4ShaderType::ShaderGraphics, predicate}; PM4Type3Header{PM4ItOpcode::DrawIndex2, 4, PM4ShaderType::ShaderGraphics};
draw_index->maxSize = index_count; draw_index->max_size = index_count;
draw_index->indexBaseLo = u32(index_addr); draw_index->index_base_lo = u32(index_addr);
draw_index->indexBaseHi = u32(index_addr >> 32); draw_index->index_base_hi = u32(index_addr >> 32);
draw_index->indexCount = index_count; draw_index->index_count = index_count;
draw_index->drawInitiator = 0; draw_index->draw_initiator = 0;
cmdbuf[6] = 0xc0021000; WriteTrailingNop<3>(cmdbuf + 6);
cmdbuf[7] = 0;
return ORBIS_OK; return ORBIS_OK;
} }
return -1; return -1;
} }
int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags) { s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags) {
LOG_INFO(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 7) && (flags & 0x1ffffffe) == 0) { if (cmdbuf && (size == 7) &&
*cmdbuf = flags & 1 | 0xc0012d00; (flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
cmdbuf[1] = index_count; cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(cmdbuf, PM4ShaderType::ShaderGraphics,
cmdbuf[2] = 2; index_count, 2u);
cmdbuf[3] = 0xc0021000; WriteTrailingNop<3>(cmdbuf);
cmdbuf[4] = 0;
return ORBIS_OK; return ORBIS_OK;
} }
return -1; return -1;
@ -202,9 +273,20 @@ int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDrawIndexOffset() { s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count,
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); u32 flags) {
return ORBIS_OK; LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 9)) {
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>(
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u);
WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK;
}
return -1;
} }
int PS4_SYSV_ABI sceGnmDrawIndirect() { int PS4_SYSV_ABI sceGnmDrawIndirect() {
@ -237,9 +319,17 @@ int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmd, u64 size) { u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
LOG_INFO(Lib_GnmDriver, "(STUBBED) called cmd_buffer = 0x{:x} size = {}", LOG_TRACE(Lib_GnmDriver, "called");
reinterpret_cast<uint64_t>(cmd), size);
if (size > 0xff) {
if constexpr (g_fair_hw_init) {
ASSERT_MSG(0, "Not implemented");
} else {
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
}
return 0x100; // it is a size, not a retcode
}
return 0; return 0;
} }
@ -462,9 +552,16 @@ int PS4_SYSV_ABI sceGnmInsertDingDongMarker() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmInsertPopMarker() { s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK;
if (cmdbuf && (size == 6)) {
cmdbuf = WritePacket<PM4ItOpcode::Nop>(
cmdbuf, PM4ShaderType::ShaderGraphics,
static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u);
return ORBIS_OK;
}
return -1;
} }
int PS4_SYSV_ABI sceGnmInsertPushColorMarker() { int PS4_SYSV_ABI sceGnmInsertPushColorMarker() {
@ -472,9 +569,25 @@ int PS4_SYSV_ABI sceGnmInsertPushColorMarker() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmInsertPushMarker() { s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marker) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK;
if (cmdbuf && marker) {
const auto len = std::strlen(marker);
const u32 packet_size = ((len + 8) >> 2) + ((len + 0xc) >> 3);
if (packet_size + 2 == size) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header =
PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics};
nop->data_block[0] = static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPush);
const auto marker_len = len + 1;
std::memcpy(&nop->data_block[1], marker, marker_len);
std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0,
packet_size * 4 - marker_len);
return ORBIS_OK;
}
}
return -1;
} }
int PS4_SYSV_ABI sceGnmInsertSetColorMarker() { int PS4_SYSV_ABI sceGnmInsertSetColorMarker() {
@ -687,38 +800,36 @@ int PS4_SYSV_ABI sceGnmSetPsShader() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs) { int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) {
if (!cmdBuffer || numDwords <= 0x27) { if (!cmdbuf || size <= 0x27) {
return -1; return -1;
} }
if (!psRegs) { if (!ps_regs) {
cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, 0u, cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, 0u,
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, 0u); // DB_SHADER_CONTROL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, 0u); // DB_SHADER_CONTROL
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, 0xfu); // CB_SHADER_MASK cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, 0xfu); // CB_SHADER_MASK
*(u64*)cmdBuffer = 0xc01c1000; *(u64*)cmdbuf = 0xc01c1000;
} else { } else {
if (psRegs[1] != 0) { if (ps_regs[1] != 0) {
LOG_ERROR(Lib_GnmDriver, "Invalid shader address."); LOG_ERROR(Lib_GnmDriver, "Invalid shader address.");
return -1; return -1;
} }
u32* start = cmdBuffer; cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0],
cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, psRegs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS cmdbuf = PM4CmdSetData::SetShReg(
cmdBuffer = PM4CmdSetData::SetShReg( cmdbuf, 10u, ps_regs[2],
cmdBuffer, 10u, psRegs[2], ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5
psRegs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 cmdbuf = PM4CmdSetData::SetContextReg(
cmdBuffer = PM4CmdSetData::SetContextReg( cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT
cmdBuffer, 0x1c4u, psRegs[4], psRegs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6],
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b3u, psRegs[6], ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR
psRegs[7]); // SPI_PS_INPUT_ENA cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b6u, ps_regs[8]); // SPI_PS_IN_CONTROL
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b6u, psRegs[8]); // SPI_PS_IN_CONTROL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b8u, ps_regs[9]); // SPI_BARYC_CNTL
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b8u, psRegs[9]); // SPI_BARYC_CNTL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, ps_regs[10]); // DB_SHADER_CONTROL
cmdBuffer = cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, ps_regs[11]); // CB_SHADER_MASK
PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, psRegs[10]); // DB_SHADER_CONTROL *(u64*)cmdbuf = 0xc00a1000;
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, psRegs[11]); // CB_SHADER_MASK
*(u64*)cmdBuffer = 0xc00a1000;
} }
return ORBIS_OK; return ORBIS_OK;
} }
@ -753,14 +864,7 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u64 size, const u32* vs_regs, u32 shader_modifier) { s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) {
LOG_INFO(Lib_GnmDriver,
"(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = "
"0x{:x} vs_reg1 = 0x{:x} vs_reg2 = 0x{:x} vs_reg3 = 0x{:x} vs_reg4 = 0x{:x} vs_reg5 = "
"0x{:x} vs_reg6 = 0x{:x}",
reinterpret_cast<uint64_t>(cmdbuf), size, shader_modifier, vs_regs[0], vs_regs[1],
vs_regs[2], vs_regs[3], vs_regs[4], vs_regs[5], vs_regs[6]);
if (!cmdbuf || size <= 0x1c) { if (!cmdbuf || size <= 0x1c) {
return -1; return -1;
} }
@ -786,7 +890,8 @@ int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u64 size, const u32* vs_regs, u3
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
*(u64*)cmdbuf = 0xc00a1000;
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
} }
@ -1073,18 +1178,85 @@ int PS4_SYSV_ABI sceGnmUpdatePsShader() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmUpdatePsShader350() { s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
if (!cmdbuf || size <= 0x27) {
return -1;
}
if (!ps_regs) {
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, 0u,
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0203u,
0u); // DB_SHADER_CONTROL update
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, 0xfu); // CB_SHADER_MASK
WriteTrailingNop<0x1d>(cmdbuf);
} else {
if (ps_regs[1] != 0) {
LOG_ERROR(Lib_GnmDriver, "Invalid shader address.");
return -1;
}
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0],
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
cmdbuf = PM4CmdSetData::SetShReg(
cmdbuf, 10u, ps_regs[2],
ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5
cmdbuf = WritePacket<PM4ItOpcode::Nop>(
cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4],
ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(
cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b3u, ps_regs[6],
ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b6u,
ps_regs[8]); // SPI_PS_IN_CONTROL update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b8u,
ps_regs[9]); // SPI_BARYC_CNTL update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0203u,
ps_regs[10]); // DB_SHADER_CONTROL update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e008fu,
ps_regs[11]); // CB_SHADER_MASK update
WriteTrailingNop<11>(cmdbuf);
}
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier) { s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs,
LOG_INFO(Lib_GnmDriver, u32 shader_modifier) {
"(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = " LOG_TRACE(Lib_GnmDriver, "called");
"0x{:x} vs_reg1 = 0x{:x} vs_reg2 = 0x{:x} vs_reg3 = 0x{:x} vs_reg4 = 0x{:x} vs_reg5 = "
"0x{:x} vs_reg6 = 0x{:x}", if (!cmdbuf || size <= 0x1c) {
reinterpret_cast<uint64_t>(cmd), size, shader_modifier, vs_regs[0], vs_regs[1], return -1;
vs_regs[2], vs_regs[3], vs_regs[4], vs_regs[5], vs_regs[6]); }
if (!vs_regs) {
LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument");
return -1;
}
if (shader_modifier & 0xfcfffc3f) {
LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask");
return -1;
}
if (vs_regs[1] != 0) {
LOG_ERROR(Lib_GnmDriver, "Invalid shader address");
return -1;
}
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier);
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0207u,
vs_regs[6]); // PA_CL_VS_OUT_CNTL update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b1u,
vs_regs[4]); // PA_CL_VS_OUT_CNTL update
cmdbuf = WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c3u,
vs_regs[5]); // PA_CL_VS_OUT_CNTL update
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -14,7 +14,8 @@ namespace Libraries::GnmDriver {
int PS4_SYSV_ABI sceGnmAddEqEvent(); int PS4_SYSV_ABI sceGnmAddEqEvent();
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed(); int PS4_SYSV_ABI sceGnmAreSubmitsAllowed();
int PS4_SYSV_ABI sceGnmBeginWorkload(); int PS4_SYSV_ABI sceGnmBeginWorkload();
int PS4_SYSV_ABI sceGnmComputeWaitOnAddress(); s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
u32 cmp_func, u32 ref);
int PS4_SYSV_ABI sceGnmComputeWaitSemaphore(); int PS4_SYSV_ABI sceGnmComputeWaitSemaphore();
int PS4_SYSV_ABI sceGnmCreateWorkloadStream(); int PS4_SYSV_ABI sceGnmCreateWorkloadStream();
int PS4_SYSV_ABI sceGnmDebuggerGetAddressWatch(); int PS4_SYSV_ABI sceGnmDebuggerGetAddressWatch();
@ -32,25 +33,27 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
int PS4_SYSV_ABI sceGnmDingDong(); int PS4_SYSV_ABI sceGnmDingDong();
int PS4_SYSV_ABI sceGnmDingDongForWorkload(); int PS4_SYSV_ABI sceGnmDingDongForWorkload();
int PS4_SYSV_ABI sceGnmDisableMipStatsReport(); int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
int PS4_SYSV_ABI sceGnmDispatchDirect(); s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
int PS4_SYSV_ABI sceGnmDispatchIndirect(); u32 threads_z, u32 flags);
s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags);
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(); int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec();
int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(); u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmd, u64 size, u32 index_count, uintptr_t index_addr, s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
u32 flags, u32 type); u32 flags, u32 type);
int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags); s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags);
int PS4_SYSV_ABI sceGnmDrawIndexIndirect(); int PS4_SYSV_ABI sceGnmDrawIndexIndirect();
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti();
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti();
int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced();
int PS4_SYSV_ABI sceGnmDrawIndexOffset(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count,
u32 flags);
int PS4_SYSV_ABI sceGnmDrawIndirect(); int PS4_SYSV_ABI sceGnmDrawIndirect();
int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti();
int PS4_SYSV_ABI sceGnmDrawIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawIndirectMulti();
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState();
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175();
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200();
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmd, u64 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(); int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState();
int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(); int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400();
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto(); int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
@ -95,9 +98,9 @@ int PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress();
int PS4_SYSV_ABI sceGnmGpuPaDebugEnter(); int PS4_SYSV_ABI sceGnmGpuPaDebugEnter();
int PS4_SYSV_ABI sceGnmGpuPaDebugLeave(); int PS4_SYSV_ABI sceGnmGpuPaDebugLeave();
int PS4_SYSV_ABI sceGnmInsertDingDongMarker(); int PS4_SYSV_ABI sceGnmInsertDingDongMarker();
int PS4_SYSV_ABI sceGnmInsertPopMarker(); s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size);
int PS4_SYSV_ABI sceGnmInsertPushColorMarker(); int PS4_SYSV_ABI sceGnmInsertPushColorMarker();
int PS4_SYSV_ABI sceGnmInsertPushMarker(); s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marker);
int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); int PS4_SYSV_ABI sceGnmInsertSetColorMarker();
int PS4_SYSV_ABI sceGnmInsertSetMarker(); int PS4_SYSV_ABI sceGnmInsertSetMarker();
int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker();
@ -140,14 +143,14 @@ int PS4_SYSV_ABI sceGnmSetGsShader();
int PS4_SYSV_ABI sceGnmSetHsShader(); int PS4_SYSV_ABI sceGnmSetHsShader();
int PS4_SYSV_ABI sceGnmSetLsShader(); int PS4_SYSV_ABI sceGnmSetLsShader();
int PS4_SYSV_ABI sceGnmSetPsShader(); int PS4_SYSV_ABI sceGnmSetPsShader();
int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs); int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs);
int PS4_SYSV_ABI sceGnmSetResourceRegistrationUserMemory(); int PS4_SYSV_ABI sceGnmSetResourceRegistrationUserMemory();
int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetResourceUserData();
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters();
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance();
int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport();
int PS4_SYSV_ABI sceGnmSetVgtControl(); int PS4_SYSV_ABI sceGnmSetVgtControl();
int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier); s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier);
int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier();
int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers();
int PS4_SYSV_ABI sceGnmSpmEndSpm(); int PS4_SYSV_ABI sceGnmSpmEndSpm();
@ -189,8 +192,9 @@ int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
int PS4_SYSV_ABI sceGnmSqttWaitForEvent(); int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers();
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes, int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
void* ccbGpuAddrs[], u32* ccbSizesInBytes); u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes);
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(); int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();
int PS4_SYSV_ABI sceGnmSubmitDone(); int PS4_SYSV_ABI sceGnmSubmitDone();
int PS4_SYSV_ABI sceGnmUnmapComputeQueue(); int PS4_SYSV_ABI sceGnmUnmapComputeQueue();
@ -200,8 +204,9 @@ int PS4_SYSV_ABI sceGnmUnregisterResource();
int PS4_SYSV_ABI sceGnmUpdateGsShader(); int PS4_SYSV_ABI sceGnmUpdateGsShader();
int PS4_SYSV_ABI sceGnmUpdateHsShader(); int PS4_SYSV_ABI sceGnmUpdateHsShader();
int PS4_SYSV_ABI sceGnmUpdatePsShader(); int PS4_SYSV_ABI sceGnmUpdatePsShader();
int PS4_SYSV_ABI sceGnmUpdatePsShader350(); s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_regs);
int PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier); s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs,
u32 shader_modifier);
int PS4_SYSV_ABI sceGnmValidateCommandBuffers(); int PS4_SYSV_ABI sceGnmValidateCommandBuffers();
int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics(); int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics();
int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics2(); int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics2();

View File

@ -26,20 +26,20 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break; break;
case PM4ItOpcode::SetContextReg: { case PM4ItOpcode::SetContextReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->regOffset], header + 2, std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->reg_offset],
(count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetShReg: { case PM4ItOpcode::SetShReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->regOffset], header + 2, std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32)); (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetUconfigReg: { case PM4ItOpcode::SetUconfigReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->regOffset], header + 2, std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset],
(count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::IndexType: { case PM4ItOpcode::IndexType: {
@ -49,11 +49,11 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
} }
case PM4ItOpcode::DrawIndex2: { case PM4ItOpcode::DrawIndex2: {
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header); auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->maxSize; regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->indexBaseLo; regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->indexBaseHi); regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
regs.num_indices = draw_index->indexCount; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->drawInitiator; regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex(); // rasterizer->DrawIndex();
break; break;
} }
@ -66,8 +66,8 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
} }
case PM4ItOpcode::EventWriteEop: { case PM4ItOpcode::EventWriteEop: {
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header); auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header);
const InterruptSelect irq_sel = event_write->intSel; const InterruptSelect irq_sel = event_write->int_sel;
const DataSelect data_sel = event_write->dataSel; const DataSelect data_sel = event_write->data_sel;
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64); ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
*event_write->Address() = event_write->DataQWord(); *event_write->Address() = event_write->DataQWord();
break; break;

View File

@ -39,7 +39,7 @@ union PM4Type3Header {
PM4Predicate pred = PM4Predicate::PredDisable) { PM4Predicate pred = PM4Predicate::PredDisable) {
raw = 0; raw = 0;
predicate.Assign(pred); predicate.Assign(pred);
shaderType.Assign(stype); shader_type.Assign(stype);
opcode.Assign(code); opcode.Assign(code);
count.Assign(num_words_min_one); count.Assign(num_words_min_one);
type.Assign(3); type.Assign(3);
@ -50,10 +50,10 @@ union PM4Type3Header {
} }
u32 raw; u32 raw;
BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set
BitField<1, 1, PM4ShaderType> shaderType; ///< 0: Graphics, 1: Compute Shader BitField<1, 1, PM4ShaderType> shader_type; ///< 0: Graphics, 1: Compute Shader
BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode
BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body. BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body.
BitField<30, 2, u32> type; ///< Packet identifier. It should be 3 for type 3 packets BitField<30, 2, u32> type; ///< Packet identifier. It should be 3 for type 3 packets
}; };
@ -64,42 +64,55 @@ union PM4Header {
BitField<30, 2, u32> type; BitField<30, 2, u32> type;
}; };
template <PM4ItOpcode opcode, typename... Args> // Write the PM4 header
constexpr u32* Write(u32* cmdbuf, PM4ShaderType type, Args... data) { template <PM4ItOpcode opcode>
// Write the PM4 header. constexpr u32* WriteHeader(u32* cmdbuf, u32 size,
PM4Type3Header header{opcode, sizeof...(Args) - 1, type}; PM4ShaderType type = PM4ShaderType::ShaderGraphics,
PM4Predicate predicate = PM4Predicate::PredDisable) {
PM4Type3Header header{opcode, size - 1, type, predicate};
std::memcpy(cmdbuf, &header, sizeof(header)); std::memcpy(cmdbuf, &header, sizeof(header));
return ++cmdbuf;
}
// Write arguments // Write arguments
template <typename... Args>
constexpr u32* WriteBody(u32* cmdbuf, Args... data) {
const std::array<u32, sizeof...(Args)> args{data...}; const std::array<u32, sizeof...(Args)> args{data...};
std::memcpy(++cmdbuf, args.data(), sizeof(args)); std::memcpy(cmdbuf, args.data(), sizeof(args));
cmdbuf += args.size(); cmdbuf += args.size();
return cmdbuf; return cmdbuf;
} }
template <PM4ItOpcode opcode, typename... Args>
constexpr u32* WritePacket(u32* cmdbuf, PM4ShaderType type, Args... data) {
cmdbuf = WriteHeader<opcode>(cmdbuf, sizeof...(Args), type);
cmdbuf = WriteBody(cmdbuf, data...);
return cmdbuf;
}
union ContextControlEnable { union ContextControlEnable {
u32 raw; u32 raw;
BitField<0, 1, u32> enableSingleCntxConfigReg; ///< single context config reg BitField<0, 1, u32> enable_single_cntx_config_reg; ///< single context config reg
BitField<1, 1, u32> enableMultiCntxRenderReg; ///< multi context render state reg BitField<1, 1, u32> enable_multi_cntx_render_reg; ///< multi context render state reg
BitField<15, 1, u32> enableUserConfigReg__CI; ///< User Config Reg on CI(reserved for SI) BitField<15, 1, u32> enable_user_config_reg__CI; ///< User Config Reg on CI(reserved for SI)
BitField<16, 1, u32> enableGfxSHReg; ///< Gfx SH Registers BitField<16, 1, u32> enable_gfx_sh_reg; ///< Gfx SH Registers
BitField<24, 1, u32> enableCSSHReg; ///< CS SH Registers BitField<24, 1, u32> enable_cs_sh_reg; ///< CS SH Registers
BitField<31, 1, u32> enableDw; ///< DW enable BitField<31, 1, u32> enable_dw; ///< DW enable
}; };
struct PM4CmdContextControl { struct PM4CmdContextControl {
PM4Type3Header header; PM4Type3Header header;
ContextControlEnable loadControl; ///< Enable bits for loading ContextControlEnable load_control; ///< Enable bits for loading
ContextControlEnable shadowEnable; ///< Enable bits for shadowing ContextControlEnable shadow_enable; ///< Enable bits for shadowing
}; };
union LoadAddressHigh { union LoadAddressHigh {
u32 raw; u32 raw;
BitField<0, 16, u32> BitField<0, 16, u32>
addrHi; ///< bits for the block in Memory from where the CP will fetch the state addr_hi; ///< bits for the block in Memory from where the CP will fetch the state
BitField<31, 1, u32> BitField<31, 1, u32>
waitIdle; ///< if set the CP will wait for the graphics pipe to be idle by writing wait_idle; ///< if set the CP will wait for the graphics pipe to be idle by writing
///< to the GRBM Wait Until register with "Wait for 3D idle" ///< to the GRBM Wait Until register with "Wait for 3D idle"
}; };
/** /**
@ -110,12 +123,12 @@ union LoadAddressHigh {
*/ */
struct PM4CmdLoadData { struct PM4CmdLoadData {
PM4Type3Header header; PM4Type3Header header;
u32 addrLo; ///< low 32 address bits for the block in memory from where the CP will fetch the u32 addr_lo; ///< low 32 address bits for the block in memory from where the CP will fetch the
///< state ///< state
LoadAddressHigh addrHi; LoadAddressHigh addr_hi;
u32 regOffset; ///< offset in DWords from the register base address u32 reg_offset; ///< offset in DWords from the register base address
u32 numDwords; ///< number of DWords that the CP will fetch and write into the chip. A value of u32 num_dwords; ///< number of DWords that the CP will fetch and write into the chip. A value of
///< zero will fetch nothing ///< zero will fetch nothing
}; };
enum class LoadDataIndex : u32 { enum class LoadDataIndex : u32 {
@ -131,8 +144,8 @@ enum class LoadDataFormat : u32 {
union LoadAddressLow { union LoadAddressLow {
u32 raw; u32 raw;
BitField<0, 1, LoadDataIndex> index; BitField<0, 1, LoadDataIndex> index;
BitField<2, 30, u32> addrLo; ///< bits for the block in Memory from where the CP will fetch the BitField<2, 30, u32> addr_lo; ///< bits for the block in Memory from where the CP will fetch the
///< state. DWORD aligned ///< state. DWORD aligned
}; };
/** /**
@ -142,16 +155,16 @@ union LoadAddressLow {
*/ */
struct PM4CmdLoadDataIndex { struct PM4CmdLoadDataIndex {
PM4Type3Header header; PM4Type3Header header;
LoadAddressLow addrLo; ///< low 32 address bits for the block in memory from where the CP will LoadAddressLow addr_lo; ///< low 32 address bits for the block in memory from where the CP will
///< fetch the state ///< fetch the state
u32 addrOffset; ///< addrLo.index = 1 Indexed mode u32 addr_offset; ///< addrLo.index = 1 Indexed mode
union { union {
BitField<0, 16, u32> regOffset; ///< offset in DWords from the register base address BitField<0, 16, u32> reg_offset; ///< offset in DWords from the register base address
BitField<31, 1, LoadDataFormat> dataFormat; BitField<31, 1, LoadDataFormat> data_format;
u32 raw; u32 raw;
}; };
u32 numDwords; ///< Number of DWords that the CP will fetch and write u32 num_dwords; ///< Number of DWords that the CP will fetch and write
///< into the chip. A value of zero will fetch nothing ///< into the chip. A value of zero will fetch nothing
}; };
/** /**
@ -168,52 +181,62 @@ struct PM4CmdSetData {
PM4Type3Header header; PM4Type3Header header;
union { union {
u32 raw; u32 raw;
BitField<0, 16, u32> regOffset; ///< Offset in DWords from the register base address BitField<0, 16, u32> reg_offset; ///< Offset in DWords from the register base address
BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+ BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+
///< Program to zero for other opcodes and on SI ///< Program to zero for other opcodes and on SI
}; };
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args> template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) { static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) {
return Write<PM4ItOpcode::SetContextReg>(cmdbuf, type, data...); return WritePacket<PM4ItOpcode::SetContextReg>(cmdbuf, type, data...);
} }
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args> template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) { static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
return Write<PM4ItOpcode::SetShReg>(cmdbuf, type, data...); return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
} }
}; };
struct PM4CmdNop { struct PM4CmdNop {
PM4Type3Header header; PM4Type3Header header;
u32 data_block[0];
enum class PayloadType : u32 {
DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope
DebugMarkerPop = 0x68750002, ///< End of GPU event scope
SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet
SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet
SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet
DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color
};
}; };
struct PM4CmdDrawIndexOffset2 { struct PM4CmdDrawIndexOffset2 {
PM4Type3Header header; PM4Type3Header header;
u32 maxSize; ///< Maximum number of indices u32 max_size; ///< Maximum number of indices
u32 indexOffset; ///< Zero based starting index number in the index buffer u32 index_offset; ///< Zero based starting index number in the index buffer
u32 indexCount; ///< number of indices in the Index Buffer u32 index_count; ///< number of indices in the Index Buffer
u32 drawInitiator; ///< draw Initiator Register u32 draw_initiator; ///< draw Initiator Register
}; };
struct PM4CmdDrawIndex2 { struct PM4CmdDrawIndex2 {
PM4Type3Header header; PM4Type3Header header;
u32 maxSize; ///< maximum number of indices u32 max_size; ///< maximum number of indices
u32 indexBaseLo; ///< base Address Lo [31:1] of Index Buffer u32 index_base_lo; ///< base Address Lo [31:1] of Index Buffer
///< (Word-Aligned). Written to the VGT_DMA_BASE register. ///< (Word-Aligned). Written to the VGT_DMA_BASE register.
u32 indexBaseHi; ///< base Address Hi [39:32] of Index Buffer. u32 index_base_hi; ///< base Address Hi [39:32] of Index Buffer.
///< Written to the VGT_DMA_BASE_HI register ///< Written to the VGT_DMA_BASE_HI register
u32 indexCount; ///< number of indices in the Index Buffer. u32 index_count; ///< number of indices in the Index Buffer.
///< Written to the VGT_NUM_INDICES register. ///< Written to the VGT_NUM_INDICES register.
u32 drawInitiator; ///< written to the VGT_DRAW_INITIATOR register u32 draw_initiator; ///< written to the VGT_DRAW_INITIATOR register
}; };
struct PM4CmdDrawIndexType { struct PM4CmdDrawIndexType {
PM4Type3Header header; PM4Type3Header header;
union { union {
u32 raw; u32 raw;
BitField<0, 2, u32> indexType; ///< Select 16 Vs 32bit index BitField<0, 2, u32> index_type; ///< Select 16 Vs 32bit index
BitField<2, 2, u32> swapMode; ///< DMA swap mode BitField<2, 2, u32> swap_mode; ///< DMA swap mode
}; };
}; };
@ -241,25 +264,25 @@ struct PM4CmdEventWriteEop {
PM4Type3Header header; PM4Type3Header header;
union { union {
u32 event_control; u32 event_control;
BitField<0, 6, u32> eventType; ///< Event type written to VGT_EVENT_INITIATOR BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR
BitField<8, 4, u32> eventIndex; ///< Event index BitField<8, 4, u32> event_index; ///< Event index
}; };
u32 addressLo; u32 address_lo;
union { union {
u32 data_control; u32 data_control;
BitField<0, 16, u32> addressHi; ///< High bits of address BitField<0, 16, u32> address_hi; ///< High bits of address
BitField<24, 2, InterruptSelect> intSel; ///< Selects interrupt action for end-of-pipe BitField<24, 2, InterruptSelect> int_sel; ///< Selects interrupt action for end-of-pipe
BitField<29, 3, DataSelect> dataSel; ///< Selects source of data BitField<29, 3, DataSelect> data_sel; ///< Selects source of data
}; };
u32 dataLo; ///< Value that will be written to memory when event occurs u32 data_lo; ///< Value that will be written to memory when event occurs
u32 dataHi; ///< Value that will be written to memory when event occurs u32 data_hi; ///< Value that will be written to memory when event occurs
u64* Address() const { u64* Address() const {
return reinterpret_cast<u64*>(addressLo | u64(addressHi) << 32); return reinterpret_cast<u64*>(address_lo | u64(address_hi) << 32);
} }
u64 DataQWord() const { u64 DataQWord() const {
return dataLo | u64(dataHi) << 32; return data_lo | u64(data_hi) << 32;
} }
}; };
@ -287,4 +310,19 @@ struct PM4DmaData {
u32 command; u32 command;
}; };
struct PM4CmdWaitRegMem {
PM4Type3Header header;
union {
BitField<0, 3, u32> function;
BitField<4, 1, u32> mem_space;
BitField<8, 1, u32> engine;
u32 raw;
};
u32 poll_addr_lo;
u32 poll_addr_hi;
u32 ref;
u32 mask;
u32 poll_interval;
};
} // namespace AmdGpu } // namespace AmdGpu

View File

@ -36,6 +36,7 @@ enum class PM4ItOpcode : u32 {
WriteData = 0x37, WriteData = 0x37,
DrawIndexIndirectMulti = 0x38, DrawIndexIndirectMulti = 0x38,
MemSemaphore = 0x39, MemSemaphore = 0x39,
WaitRegMem = 0x3c,
IndirectBuffer = 0x3F, IndirectBuffer = 0x3F,
CondIndirectBuffer = 0x3F, CondIndirectBuffer = 0x3F,
CopyData = 0x40, CopyData = 0x40,
@ -48,6 +49,7 @@ enum class PM4ItOpcode : u32 {
PremableCntl = 0x4A, PremableCntl = 0x4A,
DmaData = 0x50, DmaData = 0x50,
ContextRegRmw = 0x51, ContextRegRmw = 0x51,
Unknown58 = 0x58,
LoadShReg = 0x5F, LoadShReg = 0x5F,
LoadConfigReg = 0x60, LoadConfigReg = 0x60,
LoadContextReg = 0x61, LoadContextReg = 0x61,