diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ff00d6f..104d4396 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -472,6 +472,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/resource.h + src/video_core/amdgpu/default_context.cpp src/video_core/buffer_cache/buffer.cpp src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer_cache.cpp diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index c2ee6d59..95821a03 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -55,6 +55,10 @@ static constexpr auto HwInitPacketSize = 0x100u; // clang-format off static constexpr std::array InitSequence{ + // A fake preamble to mimic context reset sent by FW + 0xc0001200u, 0u, // IT_CLEAR_STATE + + // Actual init state sequence 0xc0017600u, 0x216u, 0xffffffffu, 0xc0017600u, 0x217u, 0xffffffffu, 0xc0017600u, 0x215u, 0u, @@ -94,9 +98,13 @@ static constexpr std::array InitSequence{ 0xc0036900u, 0x295u, 0x100u, 0x100u, 4u, 0xc0017900u, 0x200u, 0xe0000000u, }; -static_assert(InitSequence.size() == 0x73); +static_assert(InitSequence.size() == 0x73 + 2); static constexpr std::array InitSequence175{ + // A fake preamble to mimic context reset sent by FW + 0xc0001200u, 0u, // IT_CLEAR_STATE + + // Actual init state sequence 0xc0017600u, 0x216u, 0xffffffffu, 0xc0017600u, 0x217u, 0xffffffffu, 0xc0017600u, 0x215u, 0u, @@ -136,9 +144,13 @@ static constexpr std::array InitSequence175{ 0xc0036900u, 0x295u, 0x100u, 0x100u, 4u, 0xc0017900u, 0x200u, 0xe0000000u, }; -static_assert(InitSequence175.size() == 0x73); +static_assert(InitSequence175.size() == 0x73 + 2); static constexpr std::array InitSequence200{ + // A fake preamble to mimic context reset sent by FW + 0xc0001200u, 0u, // IT_CLEAR_STATE + + // Actual init state sequence 0xc0017600u, 0x216u, 0xffffffffu, 0xc0017600u, 0x217u, 0xffffffffu, 0xc0017600u, 0x215u, 0u, @@ -179,9 +191,13 @@ static constexpr std::array InitSequence200{ 0xc0036900u, 0x295u, 0x100u, 0x100u, 4u, 0xc0017900u, 0x200u, 0xe0000000u, }; -static_assert(InitSequence200.size() == 0x76); +static_assert(InitSequence200.size() == 0x76 + 2); static constexpr std::array InitSequence350{ + // A fake preamble to mimic context reset sent by FW + 0xc0001200u, 0u, // IT_CLEAR_STATE + + // Actual init state sequence 0xc0017600u, 0x216u, 0xffffffffu, 0xc0017600u, 0x217u, 0xffffffffu, 0xc0017600u, 0x215u, 0u, @@ -224,7 +240,7 @@ static constexpr std::array InitSequence350{ 0xc0017900u, 0x200u, 0xe0000000u, 0xc0016900u, 0x2aau, 0xffu, }; -static_assert(InitSequence350.size() == 0x7c); +static_assert(InitSequence350.size() == 0x7c + 2); static constexpr std::array CtxInitSequence{ 0xc0012800u, 0x80000000u, 0x80000000u, @@ -735,11 +751,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) { cmdbuf = ClearContextState(cmdbuf); } - std::memcpy(cmdbuf, InitSequence.data(), InitSequence.size() * 4); - cmdbuf += InitSequence.size(); + std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4); + cmdbuf += InitSequence.size() - 2; const auto cmdbuf_left = - HwInitPacketSize - InitSequence.size() - (clear_state ? 0xc : 0) - 1; + HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1; cmdbuf = WriteHeader(cmdbuf, cmdbuf_left); cmdbuf = WriteBody(cmdbuf, 0u); @@ -757,10 +773,10 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) { } cmdbuf = ClearContextState(cmdbuf); - std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4); - cmdbuf += InitSequence175.size(); + std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4); + cmdbuf += InitSequence175.size() - 2; - constexpr auto cmdbuf_left = HwInitPacketSize - InitSequence175.size() - 0xc - 1; + constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1; WriteTrailingNop(cmdbuf); return HwInitPacketSize; @@ -778,11 +794,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) { cmdbuf = ClearContextState(cmdbuf); } - std::memcpy(cmdbuf, InitSequence200.data(), InitSequence200.size() * 4); - cmdbuf += InitSequence200.size(); + std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4); + cmdbuf += InitSequence200.size() - 2; const auto cmdbuf_left = - HwInitPacketSize - InitSequence200.size() - (clear_state ? 0xc : 0) - 1; + HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1; cmdbuf = WriteHeader(cmdbuf, cmdbuf_left); cmdbuf = WriteBody(cmdbuf, 0u); @@ -804,11 +820,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { cmdbuf = ClearContextState(cmdbuf); } - std::memcpy(cmdbuf, InitSequence350.data(), InitSequence350.size() * 4); - cmdbuf += InitSequence350.size(); + std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4); + cmdbuf += InitSequence350.size() - 2; const auto cmdbuf_left = - HwInitPacketSize - InitSequence350.size() - (clear_state ? 0xc : 0) - 1; + HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1; cmdbuf = WriteHeader(cmdbuf, cmdbuf_left); cmdbuf = WriteBody(cmdbuf, 0u); @@ -1743,7 +1759,7 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3 return -1; } - const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier); + const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f) | shader_modifier; cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL diff --git a/src/video_core/amdgpu/default_context.cpp b/src/video_core/amdgpu/default_context.cpp new file mode 100644 index 00000000..b8b34bde --- /dev/null +++ b/src/video_core/amdgpu/default_context.cpp @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/types.h" +#include "video_core/amdgpu/liverpool.h" + +#include + +namespace AmdGpu { + +// The following values are taken from fpPS4: +// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410 +// +const std::array Liverpool::reg_array_default{ + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, + 0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u, + 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u, 0x40004000u, 0x80000000u, + 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, + 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, + 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, + 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, + 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, + 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, + 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, + 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, + 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x2a00161au, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000002u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00090000u, + 0x00000004u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000100u, 0x00000080u, 0x00000002u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x000000ffu, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00001000u, 0x00000000u, 0x00000005u, 0x3f800000u, + 0x3f800000u, 0x3f800000u, 0x3f800000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x0000000eu, 0x00000010u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index dce2d4b4..87cad052 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -216,6 +216,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span reg_array_default; // See for a comment in context reg parsing code union CbDbExtent {