From 7e8d90d609ea173d0c80d254dea1001ea96d354f Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Wed, 1 May 2024 18:29:06 +0200 Subject: [PATCH] core: libraries: gnmdriver: basic functionality extension (#120) Also a bit of refactoring in `video_core` --- src/core/libraries/gnmdriver/gnmdriver.cpp | 354 +++++++++++++++------ src/core/libraries/gnmdriver/gnmdriver.h | 37 ++- src/video_core/amdgpu/liverpool.cpp | 24 +- src/video_core/amdgpu/pm4_cmds.h | 170 ++++++---- src/video_core/amdgpu/pm4_opcodes.h | 2 + 5 files changed, 402 insertions(+), 185 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 1cafc424..ceaf4655 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -16,6 +16,20 @@ using namespace AmdGpu; static std::unique_ptr liverpool; +// In case of precise gnm driver emulation we need to send a bunch of HW-specific +// initialization commands. It may slowdown development at early stage as their +// support is not important and can be ignored for a while. +static constexpr bool g_fair_hw_init = false; + +// Write a special ending NOP packet with N DWs data block +template +static inline u32* WriteTrailingNop(u32* cmdbuf) { + auto* nop = reinterpret_cast(cmdbuf); + nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1}; + nop->data_block[0] = 0; // only one out of `data_block_size` is initialized + return cmdbuf + data_block_size + 1 /* header */; +} + int PS4_SYSV_ABI sceGnmAddEqEvent() { LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); return ORBIS_OK; @@ -31,9 +45,30 @@ int PS4_SYSV_ABI sceGnmBeginWorkload() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmComputeWaitOnAddress() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask, + u32 cmp_func, u32 ref) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 0xe)) { + cmdbuf = WriteHeader(cmdbuf, 3); + cmdbuf = WriteBody(cmdbuf, 0u); + cmdbuf += 2; + + const u32 is_mem = addr > 0xffffu; + const u32 addr_mask = is_mem ? 0xfffffffcu : 0xffffu; + auto* wait_reg_mem = reinterpret_cast(cmdbuf); + wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; + wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u); + wait_reg_mem->poll_addr_lo = u32(addr & addr_mask); + wait_reg_mem->poll_addr_hi = u32(addr >> 32u); + wait_reg_mem->ref = ref; + wait_reg_mem->mask = mask; + wait_reg_mem->poll_interval = 10; + + WriteTrailingNop<2>(cmdbuf + 7); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmComputeWaitSemaphore() { @@ -121,14 +156,37 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDispatchDirect() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y, + u32 threads_z, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 9) && ((s32)(threads_x | threads_y | threads_z) > -1)) { + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader(cmdbuf, 4, PM4ShaderType::ShaderCompute, + predicate); + cmdbuf = WriteBody(cmdbuf, threads_x, threads_y, threads_z); + cmdbuf[0] = (flags & 0x18) + 1; // ordered append mode + + WriteTrailingNop<3>(cmdbuf + 1); + return ORBIS_OK; + } + return -1; } -int PS4_SYSV_ABI sceGnmDispatchIndirect() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 7)) { + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader(cmdbuf, 2, PM4ShaderType::ShaderCompute, + predicate); + cmdbuf[0] = data_offset; + cmdbuf[1] = (flags & 0x18) + 1; // ordered append mode + + WriteTrailingNop<3>(cmdbuf + 2); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() { @@ -136,47 +194,60 @@ int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (size > 0xff) { + if constexpr (g_fair_hw_init) { + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u, + 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0 + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u, + 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1 + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS + + cmdbuf = WriteHeader( + cmdbuf, 6); // for some reason the packet indicates larger size + cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u); + + cmdbuf = WriteHeader(cmdbuf, 0xef); + cmdbuf = WriteBody(cmdbuf, 0xau, 0u); + } else { + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + } + return 0x100; // it is a size, not a retcode + } + return 0; } -s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u64 size, u32 index_count, uintptr_t index_addr, +s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr, u32 flags, u32 type) { - LOG_INFO(Lib_GnmDriver, - "(STUBBED) called cmd_buffer = 0x{:x} size = {} index_count = {} index_addr = 0x{:x} " - "flags = 0x{:x} type = {}", - reinterpret_cast(cmdbuf), size, index_count, index_addr, flags, type); + LOG_TRACE(Lib_GnmDriver, "called"); if (cmdbuf && (size == 10) && (index_addr != 0) && (index_addr & 1) == 0 && - (flags & 0x1ffffffe) == 0) { - const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + (flags & 0x1ffffffe) == 0) { // no predication will be set in the packet auto* draw_index = reinterpret_cast(cmdbuf); - draw_index->header = - PM4Type3Header{PM4ItOpcode::DrawIndex2, 4, PM4ShaderType::ShaderGraphics, predicate}; - draw_index->maxSize = index_count; - draw_index->indexBaseLo = u32(index_addr); - draw_index->indexBaseHi = u32(index_addr >> 32); - draw_index->indexCount = index_count; - draw_index->drawInitiator = 0; + PM4Type3Header{PM4ItOpcode::DrawIndex2, 4, PM4ShaderType::ShaderGraphics}; + draw_index->max_size = index_count; + draw_index->index_base_lo = u32(index_addr); + draw_index->index_base_hi = u32(index_addr >> 32); + draw_index->index_count = index_count; + draw_index->draw_initiator = 0; - cmdbuf[6] = 0xc0021000; - cmdbuf[7] = 0; + WriteTrailingNop<3>(cmdbuf + 6); return ORBIS_OK; } return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags) { - LOG_INFO(Lib_GnmDriver, "called"); +s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); - if (cmdbuf && (size == 7) && (flags & 0x1ffffffe) == 0) { - *cmdbuf = flags & 1 | 0xc0012d00; - cmdbuf[1] = index_count; - cmdbuf[2] = 2; - cmdbuf[3] = 0xc0021000; - cmdbuf[4] = 0; + if (cmdbuf && (size == 7) && + (flags & 0x1ffffffe) == 0) { // no predication will be set in the packet + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, + index_count, 2u); + WriteTrailingNop<3>(cmdbuf); return ORBIS_OK; } return -1; @@ -202,9 +273,20 @@ int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDrawIndexOffset() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, + u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 9)) { + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate); + cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u); + + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndirect() { @@ -237,9 +319,17 @@ int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmd, u64 size) { - LOG_INFO(Lib_GnmDriver, "(STUBBED) called cmd_buffer = 0x{:x} size = {}", - reinterpret_cast(cmd), size); +u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (size > 0xff) { + if constexpr (g_fair_hw_init) { + ASSERT_MSG(0, "Not implemented"); + } else { + cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0x100); + } + return 0x100; // it is a size, not a retcode + } return 0; } @@ -462,9 +552,16 @@ int PS4_SYSV_ABI sceGnmInsertDingDongMarker() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmInsertPopMarker() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 6)) { + cmdbuf = WritePacket( + cmdbuf, PM4ShaderType::ShaderGraphics, + static_cast(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmInsertPushColorMarker() { @@ -472,9 +569,25 @@ int PS4_SYSV_ABI sceGnmInsertPushColorMarker() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmInsertPushMarker() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marker) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && marker) { + const auto len = std::strlen(marker); + const u32 packet_size = ((len + 8) >> 2) + ((len + 0xc) >> 3); + if (packet_size + 2 == size) { + auto* nop = reinterpret_cast(cmdbuf); + nop->header = + PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics}; + nop->data_block[0] = static_cast(PM4CmdNop::PayloadType::DebugMarkerPush); + const auto marker_len = len + 1; + std::memcpy(&nop->data_block[1], marker, marker_len); + std::memset(reinterpret_cast(&nop->data_block[1]) + marker_len, 0, + packet_size * 4 - marker_len); + return ORBIS_OK; + } + } + return -1; } int PS4_SYSV_ABI sceGnmInsertSetColorMarker() { @@ -687,38 +800,36 @@ int PS4_SYSV_ABI sceGnmSetPsShader() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs) { - if (!cmdBuffer || numDwords <= 0x27) { +int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) { + if (!cmdbuf || size <= 0x27) { return -1; } - if (!psRegs) { - cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, 0u, - 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, 0u); // DB_SHADER_CONTROL - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, 0xfu); // CB_SHADER_MASK - *(u64*)cmdBuffer = 0xc01c1000; + if (!ps_regs) { + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, 0u, + 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, 0u); // DB_SHADER_CONTROL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, 0xfu); // CB_SHADER_MASK + *(u64*)cmdbuf = 0xc01c1000; } else { - if (psRegs[1] != 0) { + if (ps_regs[1] != 0) { LOG_ERROR(Lib_GnmDriver, "Invalid shader address."); return -1; } - u32* start = cmdBuffer; - cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, psRegs[0], - 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdBuffer = PM4CmdSetData::SetShReg( - cmdBuffer, 10u, psRegs[2], - psRegs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 - cmdBuffer = PM4CmdSetData::SetContextReg( - cmdBuffer, 0x1c4u, psRegs[4], psRegs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b3u, psRegs[6], - psRegs[7]); // SPI_PS_INPUT_ENA - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b6u, psRegs[8]); // SPI_PS_IN_CONTROL - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b8u, psRegs[9]); // SPI_BARYC_CNTL - cmdBuffer = - PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, psRegs[10]); // DB_SHADER_CONTROL - cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, psRegs[11]); // CB_SHADER_MASK - *(u64*)cmdBuffer = 0xc00a1000; + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], + 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS + cmdbuf = PM4CmdSetData::SetShReg( + cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = PM4CmdSetData::SetContextReg( + cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], + ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b6u, ps_regs[8]); // SPI_PS_IN_CONTROL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b8u, ps_regs[9]); // SPI_BARYC_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, ps_regs[10]); // DB_SHADER_CONTROL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, ps_regs[11]); // CB_SHADER_MASK + *(u64*)cmdbuf = 0xc00a1000; } return ORBIS_OK; } @@ -753,14 +864,7 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u64 size, const u32* vs_regs, u32 shader_modifier) { - LOG_INFO(Lib_GnmDriver, - "(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = " - "0x{:x} vs_reg1 = 0x{:x} vs_reg2 = 0x{:x} vs_reg3 = 0x{:x} vs_reg4 = 0x{:x} vs_reg5 = " - "0x{:x} vs_reg6 = 0x{:x}", - reinterpret_cast(cmdbuf), size, shader_modifier, vs_regs[0], vs_regs[1], - vs_regs[2], vs_regs[3], vs_regs[4], vs_regs[5], vs_regs[6]); - +s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) { if (!cmdbuf || size <= 0x1c) { return -1; } @@ -786,7 +890,8 @@ int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u64 size, const u32* vs_regs, u3 cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT - *(u64*)cmdbuf = 0xc00a1000; + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -1073,18 +1178,85 @@ int PS4_SYSV_ABI sceGnmUpdatePsShader() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmUpdatePsShader350() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size <= 0x27) { + return -1; + } + if (!ps_regs) { + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, 0u, + 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0203u, + 0u); // DB_SHADER_CONTROL update + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, 0xfu); // CB_SHADER_MASK + + WriteTrailingNop<0x1d>(cmdbuf); + } else { + if (ps_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address."); + return -1; + } + + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], + 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS + cmdbuf = PM4CmdSetData::SetShReg( + cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = WritePacket( + cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4], + ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update + cmdbuf = WritePacket( + cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b3u, ps_regs[6], + ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b6u, + ps_regs[8]); // SPI_PS_IN_CONTROL update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b8u, + ps_regs[9]); // SPI_BARYC_CNTL update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0203u, + ps_regs[10]); // DB_SHADER_CONTROL update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e008fu, + ps_regs[11]); // CB_SHADER_MASK update + + WriteTrailingNop<11>(cmdbuf); + } return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier) { - LOG_INFO(Lib_GnmDriver, - "(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = " - "0x{:x} vs_reg1 = 0x{:x} vs_reg2 = 0x{:x} vs_reg3 = 0x{:x} vs_reg4 = 0x{:x} vs_reg5 = " - "0x{:x} vs_reg6 = 0x{:x}", - reinterpret_cast(cmd), size, shader_modifier, vs_regs[0], vs_regs[1], - vs_regs[2], vs_regs[3], vs_regs[4], vs_regs[5], vs_regs[6]); +s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, + u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size <= 0x1c) { + return -1; + } + + if (!vs_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + if (shader_modifier & 0xfcfffc3f) { + LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask"); + return -1; + } + + if (vs_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier); + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0207u, + vs_regs[6]); // PA_CL_VS_OUT_CNTL update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01b1u, + vs_regs[4]); // PA_CL_VS_OUT_CNTL update + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c3u, + vs_regs[5]); // PA_CL_VS_OUT_CNTL update + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index d5059841..34d9b256 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -14,7 +14,8 @@ namespace Libraries::GnmDriver { int PS4_SYSV_ABI sceGnmAddEqEvent(); int PS4_SYSV_ABI sceGnmAreSubmitsAllowed(); int PS4_SYSV_ABI sceGnmBeginWorkload(); -int PS4_SYSV_ABI sceGnmComputeWaitOnAddress(); +s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask, + u32 cmp_func, u32 ref); int PS4_SYSV_ABI sceGnmComputeWaitSemaphore(); int PS4_SYSV_ABI sceGnmCreateWorkloadStream(); int PS4_SYSV_ABI sceGnmDebuggerGetAddressWatch(); @@ -32,25 +33,27 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream(); int PS4_SYSV_ABI sceGnmDingDong(); int PS4_SYSV_ABI sceGnmDingDongForWorkload(); int PS4_SYSV_ABI sceGnmDisableMipStatsReport(); -int PS4_SYSV_ABI sceGnmDispatchDirect(); -int PS4_SYSV_ABI sceGnmDispatchIndirect(); +s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y, + u32 threads_z, u32 flags); +s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags); int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(); -int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(); -s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmd, u64 size, u32 index_count, uintptr_t index_addr, +u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size); +s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr, u32 flags, u32 type); -int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags); +s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexIndirect(); int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); -int PS4_SYSV_ABI sceGnmDrawIndexOffset(); +s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, + u32 flags); int PS4_SYSV_ABI sceGnmDrawIndirect(); int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(); -int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmd, u64 size); +u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size); int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(); int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(); int PS4_SYSV_ABI sceGnmDrawOpaqueAuto(); @@ -95,9 +98,9 @@ int PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress(); int PS4_SYSV_ABI sceGnmGpuPaDebugEnter(); int PS4_SYSV_ABI sceGnmGpuPaDebugLeave(); int PS4_SYSV_ABI sceGnmInsertDingDongMarker(); -int PS4_SYSV_ABI sceGnmInsertPopMarker(); +s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size); int PS4_SYSV_ABI sceGnmInsertPushColorMarker(); -int PS4_SYSV_ABI sceGnmInsertPushMarker(); +s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marker); int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); int PS4_SYSV_ABI sceGnmInsertSetMarker(); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); @@ -140,14 +143,14 @@ int PS4_SYSV_ABI sceGnmSetGsShader(); int PS4_SYSV_ABI sceGnmSetHsShader(); int PS4_SYSV_ABI sceGnmSetLsShader(); int PS4_SYSV_ABI sceGnmSetPsShader(); -int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs); +int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs); int PS4_SYSV_ABI sceGnmSetResourceRegistrationUserMemory(); int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); int PS4_SYSV_ABI sceGnmSetVgtControl(); -int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier); +s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); int PS4_SYSV_ABI sceGnmSpmEndSpm(); @@ -189,8 +192,9 @@ int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2(); int PS4_SYSV_ABI sceGnmSqttWaitForEvent(); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(); -int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes, - void* ccbGpuAddrs[], u32* ccbSizesInBytes); +int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes); int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(); int PS4_SYSV_ABI sceGnmSubmitDone(); int PS4_SYSV_ABI sceGnmUnmapComputeQueue(); @@ -200,8 +204,9 @@ int PS4_SYSV_ABI sceGnmUnregisterResource(); int PS4_SYSV_ABI sceGnmUpdateGsShader(); int PS4_SYSV_ABI sceGnmUpdateHsShader(); int PS4_SYSV_ABI sceGnmUpdatePsShader(); -int PS4_SYSV_ABI sceGnmUpdatePsShader350(); -int PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier); +s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_regs); +s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, + u32 shader_modifier); int PS4_SYSV_ABI sceGnmValidateCommandBuffers(); int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics(); int PS4_SYSV_ABI sceGnmValidateDisableDiagnostics2(); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index e57226ed..679cab90 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -26,20 +26,20 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { break; case PM4ItOpcode::SetContextReg: { auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->regOffset], header + 2, - (count - 1) * sizeof(u32)); + std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], + header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetShReg: { auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->regOffset], header + 2, + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetUconfigReg: { auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->regOffset], header + 2, - (count - 1) * sizeof(u32)); + std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], + header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::IndexType: { @@ -49,11 +49,11 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::DrawIndex2: { auto* draw_index = reinterpret_cast(header); - regs.max_index_size = draw_index->maxSize; - regs.index_base_address.base_addr_lo = draw_index->indexBaseLo; - regs.index_base_address.base_addr_hi.Assign(draw_index->indexBaseHi); - regs.num_indices = draw_index->indexCount; - regs.draw_initiator = draw_index->drawInitiator; + regs.max_index_size = draw_index->max_size; + regs.index_base_address.base_addr_lo = draw_index->index_base_lo; + regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); + regs.num_indices = draw_index->index_count; + regs.draw_initiator = draw_index->draw_initiator; // rasterizer->DrawIndex(); break; } @@ -66,8 +66,8 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::EventWriteEop: { auto* event_write = reinterpret_cast(header); - const InterruptSelect irq_sel = event_write->intSel; - const DataSelect data_sel = event_write->dataSel; + const InterruptSelect irq_sel = event_write->int_sel; + const DataSelect data_sel = event_write->data_sel; ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64); *event_write->Address() = event_write->DataQWord(); break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 0267a5b2..eb258dc3 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -39,7 +39,7 @@ union PM4Type3Header { PM4Predicate pred = PM4Predicate::PredDisable) { raw = 0; predicate.Assign(pred); - shaderType.Assign(stype); + shader_type.Assign(stype); opcode.Assign(code); count.Assign(num_words_min_one); type.Assign(3); @@ -50,10 +50,10 @@ union PM4Type3Header { } u32 raw; - BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set - BitField<1, 1, PM4ShaderType> shaderType; ///< 0: Graphics, 1: Compute Shader - BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode - BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body. + BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set + BitField<1, 1, PM4ShaderType> shader_type; ///< 0: Graphics, 1: Compute Shader + BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode + BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body. BitField<30, 2, u32> type; ///< Packet identifier. It should be 3 for type 3 packets }; @@ -64,42 +64,55 @@ union PM4Header { BitField<30, 2, u32> type; }; -template -constexpr u32* Write(u32* cmdbuf, PM4ShaderType type, Args... data) { - // Write the PM4 header. - PM4Type3Header header{opcode, sizeof...(Args) - 1, type}; +// Write the PM4 header +template +constexpr u32* WriteHeader(u32* cmdbuf, u32 size, + PM4ShaderType type = PM4ShaderType::ShaderGraphics, + PM4Predicate predicate = PM4Predicate::PredDisable) { + PM4Type3Header header{opcode, size - 1, type, predicate}; std::memcpy(cmdbuf, &header, sizeof(header)); + return ++cmdbuf; +} - // Write arguments +// Write arguments +template +constexpr u32* WriteBody(u32* cmdbuf, Args... data) { const std::array args{data...}; - std::memcpy(++cmdbuf, args.data(), sizeof(args)); + std::memcpy(cmdbuf, args.data(), sizeof(args)); cmdbuf += args.size(); return cmdbuf; } +template +constexpr u32* WritePacket(u32* cmdbuf, PM4ShaderType type, Args... data) { + cmdbuf = WriteHeader(cmdbuf, sizeof...(Args), type); + cmdbuf = WriteBody(cmdbuf, data...); + return cmdbuf; +} + union ContextControlEnable { u32 raw; - BitField<0, 1, u32> enableSingleCntxConfigReg; ///< single context config reg - BitField<1, 1, u32> enableMultiCntxRenderReg; ///< multi context render state reg - BitField<15, 1, u32> enableUserConfigReg__CI; ///< User Config Reg on CI(reserved for SI) - BitField<16, 1, u32> enableGfxSHReg; ///< Gfx SH Registers - BitField<24, 1, u32> enableCSSHReg; ///< CS SH Registers - BitField<31, 1, u32> enableDw; ///< DW enable + BitField<0, 1, u32> enable_single_cntx_config_reg; ///< single context config reg + BitField<1, 1, u32> enable_multi_cntx_render_reg; ///< multi context render state reg + BitField<15, 1, u32> enable_user_config_reg__CI; ///< User Config Reg on CI(reserved for SI) + BitField<16, 1, u32> enable_gfx_sh_reg; ///< Gfx SH Registers + BitField<24, 1, u32> enable_cs_sh_reg; ///< CS SH Registers + BitField<31, 1, u32> enable_dw; ///< DW enable }; struct PM4CmdContextControl { PM4Type3Header header; - ContextControlEnable loadControl; ///< Enable bits for loading - ContextControlEnable shadowEnable; ///< Enable bits for shadowing + ContextControlEnable load_control; ///< Enable bits for loading + ContextControlEnable shadow_enable; ///< Enable bits for shadowing }; union LoadAddressHigh { u32 raw; BitField<0, 16, u32> - addrHi; ///< bits for the block in Memory from where the CP will fetch the state + addr_hi; ///< bits for the block in Memory from where the CP will fetch the state BitField<31, 1, u32> - waitIdle; ///< if set the CP will wait for the graphics pipe to be idle by writing - ///< to the GRBM Wait Until register with "Wait for 3D idle" + wait_idle; ///< if set the CP will wait for the graphics pipe to be idle by writing + ///< to the GRBM Wait Until register with "Wait for 3D idle" }; /** @@ -110,12 +123,12 @@ union LoadAddressHigh { */ struct PM4CmdLoadData { PM4Type3Header header; - u32 addrLo; ///< low 32 address bits for the block in memory from where the CP will fetch the - ///< state - LoadAddressHigh addrHi; - u32 regOffset; ///< offset in DWords from the register base address - u32 numDwords; ///< number of DWords that the CP will fetch and write into the chip. A value of - ///< zero will fetch nothing + u32 addr_lo; ///< low 32 address bits for the block in memory from where the CP will fetch the + ///< state + LoadAddressHigh addr_hi; + u32 reg_offset; ///< offset in DWords from the register base address + u32 num_dwords; ///< number of DWords that the CP will fetch and write into the chip. A value of + ///< zero will fetch nothing }; enum class LoadDataIndex : u32 { @@ -131,8 +144,8 @@ enum class LoadDataFormat : u32 { union LoadAddressLow { u32 raw; BitField<0, 1, LoadDataIndex> index; - BitField<2, 30, u32> addrLo; ///< bits for the block in Memory from where the CP will fetch the - ///< state. DWORD aligned + BitField<2, 30, u32> addr_lo; ///< bits for the block in Memory from where the CP will fetch the + ///< state. DWORD aligned }; /** @@ -142,16 +155,16 @@ union LoadAddressLow { */ struct PM4CmdLoadDataIndex { PM4Type3Header header; - LoadAddressLow addrLo; ///< low 32 address bits for the block in memory from where the CP will - ///< fetch the state - u32 addrOffset; ///< addrLo.index = 1 Indexed mode + LoadAddressLow addr_lo; ///< low 32 address bits for the block in memory from where the CP will + ///< fetch the state + u32 addr_offset; ///< addrLo.index = 1 Indexed mode union { - BitField<0, 16, u32> regOffset; ///< offset in DWords from the register base address - BitField<31, 1, LoadDataFormat> dataFormat; + BitField<0, 16, u32> reg_offset; ///< offset in DWords from the register base address + BitField<31, 1, LoadDataFormat> data_format; u32 raw; }; - u32 numDwords; ///< Number of DWords that the CP will fetch and write - ///< into the chip. A value of zero will fetch nothing + u32 num_dwords; ///< Number of DWords that the CP will fetch and write + ///< into the chip. A value of zero will fetch nothing }; /** @@ -168,52 +181,62 @@ struct PM4CmdSetData { PM4Type3Header header; union { u32 raw; - BitField<0, 16, u32> regOffset; ///< Offset in DWords from the register base address - BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+ - ///< Program to zero for other opcodes and on SI + BitField<0, 16, u32> reg_offset; ///< Offset in DWords from the register base address + BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+ + ///< Program to zero for other opcodes and on SI }; template static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) { - return Write(cmdbuf, type, data...); + return WritePacket(cmdbuf, type, data...); } template static constexpr u32* SetShReg(u32* cmdbuf, Args... data) { - return Write(cmdbuf, type, data...); + return WritePacket(cmdbuf, type, data...); } }; struct PM4CmdNop { PM4Type3Header header; + u32 data_block[0]; + + enum class PayloadType : u32 { + DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope + DebugMarkerPop = 0x68750002, ///< End of GPU event scope + SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet + SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet + SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet + DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color + }; }; struct PM4CmdDrawIndexOffset2 { PM4Type3Header header; - u32 maxSize; ///< Maximum number of indices - u32 indexOffset; ///< Zero based starting index number in the index buffer - u32 indexCount; ///< number of indices in the Index Buffer - u32 drawInitiator; ///< draw Initiator Register + u32 max_size; ///< Maximum number of indices + u32 index_offset; ///< Zero based starting index number in the index buffer + u32 index_count; ///< number of indices in the Index Buffer + u32 draw_initiator; ///< draw Initiator Register }; struct PM4CmdDrawIndex2 { PM4Type3Header header; - u32 maxSize; ///< maximum number of indices - u32 indexBaseLo; ///< base Address Lo [31:1] of Index Buffer - ///< (Word-Aligned). Written to the VGT_DMA_BASE register. - u32 indexBaseHi; ///< base Address Hi [39:32] of Index Buffer. - ///< Written to the VGT_DMA_BASE_HI register - u32 indexCount; ///< number of indices in the Index Buffer. - ///< Written to the VGT_NUM_INDICES register. - u32 drawInitiator; ///< written to the VGT_DRAW_INITIATOR register + u32 max_size; ///< maximum number of indices + u32 index_base_lo; ///< base Address Lo [31:1] of Index Buffer + ///< (Word-Aligned). Written to the VGT_DMA_BASE register. + u32 index_base_hi; ///< base Address Hi [39:32] of Index Buffer. + ///< Written to the VGT_DMA_BASE_HI register + u32 index_count; ///< number of indices in the Index Buffer. + ///< Written to the VGT_NUM_INDICES register. + u32 draw_initiator; ///< written to the VGT_DRAW_INITIATOR register }; struct PM4CmdDrawIndexType { PM4Type3Header header; union { u32 raw; - BitField<0, 2, u32> indexType; ///< Select 16 Vs 32bit index - BitField<2, 2, u32> swapMode; ///< DMA swap mode + BitField<0, 2, u32> index_type; ///< Select 16 Vs 32bit index + BitField<2, 2, u32> swap_mode; ///< DMA swap mode }; }; @@ -241,25 +264,25 @@ struct PM4CmdEventWriteEop { PM4Type3Header header; union { u32 event_control; - BitField<0, 6, u32> eventType; ///< Event type written to VGT_EVENT_INITIATOR - BitField<8, 4, u32> eventIndex; ///< Event index + BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR + BitField<8, 4, u32> event_index; ///< Event index }; - u32 addressLo; + u32 address_lo; union { u32 data_control; - BitField<0, 16, u32> addressHi; ///< High bits of address - BitField<24, 2, InterruptSelect> intSel; ///< Selects interrupt action for end-of-pipe - BitField<29, 3, DataSelect> dataSel; ///< Selects source of data + BitField<0, 16, u32> address_hi; ///< High bits of address + BitField<24, 2, InterruptSelect> int_sel; ///< Selects interrupt action for end-of-pipe + BitField<29, 3, DataSelect> data_sel; ///< Selects source of data }; - u32 dataLo; ///< Value that will be written to memory when event occurs - u32 dataHi; ///< Value that will be written to memory when event occurs + u32 data_lo; ///< Value that will be written to memory when event occurs + u32 data_hi; ///< Value that will be written to memory when event occurs u64* Address() const { - return reinterpret_cast(addressLo | u64(addressHi) << 32); + return reinterpret_cast(address_lo | u64(address_hi) << 32); } u64 DataQWord() const { - return dataLo | u64(dataHi) << 32; + return data_lo | u64(data_hi) << 32; } }; @@ -287,4 +310,19 @@ struct PM4DmaData { u32 command; }; +struct PM4CmdWaitRegMem { + PM4Type3Header header; + union { + BitField<0, 3, u32> function; + BitField<4, 1, u32> mem_space; + BitField<8, 1, u32> engine; + u32 raw; + }; + u32 poll_addr_lo; + u32 poll_addr_hi; + u32 ref; + u32 mask; + u32 poll_interval; +}; + } // namespace AmdGpu diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 14564f95..2772716a 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -36,6 +36,7 @@ enum class PM4ItOpcode : u32 { WriteData = 0x37, DrawIndexIndirectMulti = 0x38, MemSemaphore = 0x39, + WaitRegMem = 0x3c, IndirectBuffer = 0x3F, CondIndirectBuffer = 0x3F, CopyData = 0x40, @@ -48,6 +49,7 @@ enum class PM4ItOpcode : u32 { PremableCntl = 0x4A, DmaData = 0x50, ContextRegRmw = 0x51, + Unknown58 = 0x58, LoadShReg = 0x5F, LoadConfigReg = 0x60, LoadContextReg = 0x61,