Merge pull request #212 from shadps4-emu/missing_gnm
More GnmDriver functions implementation
This commit is contained in:
commit
52ff825171
|
@ -24,6 +24,20 @@ using namespace AmdGpu;
|
||||||
|
|
||||||
static std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
static std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||||
|
|
||||||
|
enum ShaderStages : u32 {
|
||||||
|
Cs,
|
||||||
|
Ps,
|
||||||
|
Vs,
|
||||||
|
Gs,
|
||||||
|
Es,
|
||||||
|
Hs,
|
||||||
|
Ls,
|
||||||
|
|
||||||
|
Max
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu};
|
||||||
|
|
||||||
// In case of precise gnm driver emulation we need to send a bunch of HW-specific
|
// In case of precise gnm driver emulation we need to send a bunch of HW-specific
|
||||||
// initialization commands. It may slowdown development at early stage as their
|
// initialization commands. It may slowdown development at early stage as their
|
||||||
// support is not important and can be ignored for a while.
|
// support is not important and can be ignored for a while.
|
||||||
|
@ -347,10 +361,30 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirect() {
|
s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage,
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
u32 vertex_sgpr_offset, u32 instance_vgpr_offset,
|
||||||
|
u32 flags) {
|
||||||
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
if (cmdbuf && (size == 9) && (shader_stage < ShaderStages::Max) &&
|
||||||
|
(vertex_sgpr_offset < 0x10u) && (instance_vgpr_offset < 0x10u)) {
|
||||||
|
|
||||||
|
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
|
||||||
|
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexIndirect>(
|
||||||
|
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
|
||||||
|
|
||||||
|
const auto sgpr_offset = indirect_sgpr_offsets[shader_stage];
|
||||||
|
|
||||||
|
cmdbuf[0] = data_offset;
|
||||||
|
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||||
|
cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset;
|
||||||
|
cmdbuf[3] = 0;
|
||||||
|
|
||||||
|
WriteTrailingNop<3>(cmdbuf);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti() {
|
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||||
|
@ -383,10 +417,29 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndirect() {
|
s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage,
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
u32 vertex_sgpr_offset, u32 instance_vgpr_offset, u32 flags) {
|
||||||
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
if (cmdbuf && (size == 9) && (shader_stage < ShaderStages::Max) &&
|
||||||
|
(vertex_sgpr_offset < 0x10u) && (instance_vgpr_offset < 0x10u)) {
|
||||||
|
|
||||||
|
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
|
||||||
|
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndirect>(cmdbuf, 4, PM4ShaderType::ShaderGraphics,
|
||||||
|
predicate);
|
||||||
|
|
||||||
|
const auto sgpr_offset = indirect_sgpr_offsets[shader_stage];
|
||||||
|
|
||||||
|
cmdbuf[0] = data_offset;
|
||||||
|
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||||
|
cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset;
|
||||||
|
cmdbuf[3] = 2; // auto index
|
||||||
|
|
||||||
|
WriteTrailingNop<3>(cmdbuf);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti() {
|
int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||||
|
@ -972,8 +1025,73 @@ s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32*
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() {
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
u32 shader_modifier) {
|
||||||
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
if (shader_id > 1) {
|
||||||
|
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
||||||
|
return 0x8eee00ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
constexpr static std::array ps0_code alignas(256) = {
|
||||||
|
0xbeeb03ffu, 0x00000003u, // s_mov_b32 vcc_hi, $0x00000003
|
||||||
|
0x7e000280u, // v_mov_b32 v0, 0
|
||||||
|
0x5e000100u, // v_cvt_pkrtz_f16_f32 v0, v0, v0
|
||||||
|
0xbf800000u, // s_nop
|
||||||
|
0xf8001c0fu, 0x00000000u, // exp mrt0, v0, v0 compr vm done
|
||||||
|
0xbf810000u, // s_endpgm
|
||||||
|
|
||||||
|
// Binary header
|
||||||
|
0x5362724fu, 0x07726468u, 0x00002043u, 0u, 0xb0a45b2bu, 0x1d39766du, 0x72044b7bu, 0x0000000fu,
|
||||||
|
// PS regs
|
||||||
|
0x0fe000f0u, 0u, 0xc0000u, 4u, 0u, 4u, 2u, 2u, 0u, 0u, 0x10u, 0xfu, 0xcu, 0u, 0u, 0u,
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto shader0_addr = uintptr_t(ps0_code.data()); // Original address is 0xfe000f00
|
||||||
|
const static u32 ps0_regs[] = {
|
||||||
|
u32(shader0_addr >> 8), u32(shader0_addr >> 40), 0xc0000u, 4u, 0u, 4u, 2u, 2u, 0u, 0u, 0x10u, 0xfu, 0xcu};
|
||||||
|
|
||||||
|
constexpr static std::array ps1_code alignas(256) = {
|
||||||
|
0xbeeb03ffu, 0x00000003u, // s_mov_b32 vcc_hi, $0x00000003
|
||||||
|
0x7e040280u, // v_mov_b32 v2, 0
|
||||||
|
0xf8001803u, 0x02020202u, // exp mrt0, v2, v2, off, off vm done
|
||||||
|
0xbf810000u, // s_endpgm
|
||||||
|
|
||||||
|
// Binary header
|
||||||
|
0x5362724fu, 0x07726468u, 0x00001841u, 0x04080002u, 0x98b9cb94u, 0u, 0x6f130734u, 0x0000000fu,
|
||||||
|
// PS regs
|
||||||
|
0x0fe000f2u, 0u, 0x2000u, 0u, 0u, 2u, 2u, 2u, 0u, 0u, 0x10u, 3u, 0xcu,
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto shader1_addr = uintptr_t(ps1_code.data()); // Original address is 0xfe000f20
|
||||||
|
const static u32 ps1_regs[] = {
|
||||||
|
u32(shader1_addr >> 8), u32(shader1_addr >> 40), 0x2000u, 0u, 0u, 2u, 2u, 2u, 0u, 0u, 0x10u, 3u, 0xcu};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
const auto ps_regs = shader_id == 0 ? ps0_regs : ps1_regs;
|
||||||
|
|
||||||
|
// Normally the driver will do a call to `sceGnmSetPsShader350()`, but this function has
|
||||||
|
// a check for zero in the upper part of shader address. In our case, the address is a
|
||||||
|
// pointer to a stack memory, so the check will likely fail. To workaround it we will
|
||||||
|
// repeat set shader functionality here as it is trivial.
|
||||||
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0],
|
||||||
|
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
|
||||||
|
cmdbuf =
|
||||||
|
PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2],
|
||||||
|
ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c4u, ps_regs[4],
|
||||||
|
ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6],
|
||||||
|
ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b6u, ps_regs[8]); // SPI_PS_IN_CONTROL
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b8u, ps_regs[9]); // SPI_BARYC_CNTL
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, ps_regs[10]); // DB_SHADER_CONTROL
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, ps_regs[11]); // CB_SHADER_MASK
|
||||||
|
|
||||||
|
WriteTrailingNop<11>(cmdbuf);
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -981,10 +1099,15 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
u32 shader_modifier) {
|
u32 shader_modifier) {
|
||||||
LOG_TRACE(Lib_GnmDriver, "called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
if (shader_id != 0) {
|
||||||
|
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
||||||
|
return 0x8eee00ff;
|
||||||
|
}
|
||||||
|
|
||||||
// A fullscreen triangle with one uv set
|
// A fullscreen triangle with one uv set
|
||||||
// clang-format off
|
// clang-format off
|
||||||
constexpr static std::array shader_code alignas(256) = {
|
constexpr static std::array shader_code alignas(256) = {
|
||||||
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
0xbeeb03ffu, 0x00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
||||||
0x36020081u, // v_and_b32 v1, 1, v0
|
0x36020081u, // v_and_b32 v1, 1, v0
|
||||||
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
||||||
0x360000c2u, // v_and_b32 v0, -2, v0
|
0x360000c2u, // v_and_b32 v0, -2, v0
|
||||||
|
@ -999,9 +1122,9 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
0xbf810000u, // s_endpgm
|
0xbf810000u, // s_endpgm
|
||||||
|
|
||||||
// Binary header
|
// Binary header
|
||||||
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, 0x00000017u,
|
||||||
// VS regs
|
// VS regs
|
||||||
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
|
0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
@ -1009,10 +1132,6 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
const static u32 vs_regs[] = {
|
const static u32 vs_regs[] = {
|
||||||
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
||||||
|
|
||||||
if (shader_id != 0) {
|
|
||||||
return 0x8eee00ff;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has
|
// Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has
|
||||||
// a check for zero in the upper part of shader address. In our case, the address is a
|
// a check for zero in the upper part of shader address. In our case, the address is a
|
||||||
// pointer to a stack memory, so the check will likely fail. To workaround it we will
|
// pointer to a stack memory, so the check will likely fail. To workaround it we will
|
||||||
|
|
|
@ -44,13 +44,16 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
|
||||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
|
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
|
||||||
u32 flags, u32 type);
|
u32 flags, u32 type);
|
||||||
s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags);
|
s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags);
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirect();
|
s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage,
|
||||||
|
u32 vertex_sgpr_offset, u32 instance_vgpr_offset,
|
||||||
|
u32 flags);
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti();
|
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti();
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti();
|
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti();
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced();
|
int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced();
|
||||||
s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count,
|
s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count,
|
||||||
u32 flags);
|
u32 flags);
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndirect();
|
s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage,
|
||||||
|
u32 vertex_sgpr_offset, u32 instance_vgpr_offset, u32 flags);
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti();
|
int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti();
|
||||||
int PS4_SYSV_ABI sceGnmDrawIndirectMulti();
|
int PS4_SYSV_ABI sceGnmDrawIndirectMulti();
|
||||||
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState();
|
int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState();
|
||||||
|
@ -143,7 +146,8 @@ int PS4_SYSV_ABI sceGnmSdmaOpen();
|
||||||
s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs);
|
s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs);
|
||||||
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
|
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
|
||||||
u32 modifier);
|
u32 modifier);
|
||||||
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader();
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
|
u32 shader_modifier);
|
||||||
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier);
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier);
|
||||||
int PS4_SYSV_ABI sceGnmSetEsShader();
|
int PS4_SYSV_ABI sceGnmSetEsShader();
|
||||||
int PS4_SYSV_ABI sceGnmSetGsRingSizes();
|
int PS4_SYSV_ABI sceGnmSetGsRingSizes();
|
||||||
|
|
|
@ -253,6 +253,20 @@ struct PM4CmdDrawIndexAuto {
|
||||||
u32 draw_initiator;
|
u32 draw_initiator;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PM4CmdDrawIndirect {
|
||||||
|
PM4Type3Header header; ///< header
|
||||||
|
u32 data_offset; ///< DWORD aligned offset
|
||||||
|
union {
|
||||||
|
u32 dw2;
|
||||||
|
BitField<0, 16, u32> base_vtx_loc; ///< base vertex location
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
u32 dw3;
|
||||||
|
BitField<0, 16, u32> start_inst_loc; ///< start instance location
|
||||||
|
};
|
||||||
|
u32 draw_initiator; ///< Draw Initiator Register
|
||||||
|
};
|
||||||
|
|
||||||
enum class DataSelect : u32 {
|
enum class DataSelect : u32 {
|
||||||
None = 0,
|
None = 0,
|
||||||
Data32Low = 1,
|
Data32Low = 1,
|
||||||
|
|
Loading…
Reference in New Issue