GnmDriver: Clear context support (#567)
* gnmdriver: added support for gpu context reset * shader_recompiler: minor validation fixes * shader_recompiler: added `V_CMPX_GT_I32` * shader_recompiler: fix for crash on inline sampler access * compilation warnings and dead code elimination * amdgpu: fix for registers addressing * libraries: videoout: reduce logging pressure * shader_recompiler: fix for devergence scope detection
This commit is contained in:
parent
c2ddfe51e1
commit
b687ae5e34
|
@ -488,6 +488,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/amdgpu/pm4_cmds.h
|
src/video_core/amdgpu/pm4_cmds.h
|
||||||
src/video_core/amdgpu/pm4_opcodes.h
|
src/video_core/amdgpu/pm4_opcodes.h
|
||||||
src/video_core/amdgpu/resource.h
|
src/video_core/amdgpu/resource.h
|
||||||
|
src/video_core/amdgpu/default_context.cpp
|
||||||
src/video_core/buffer_cache/buffer.cpp
|
src/video_core/buffer_cache/buffer.cpp
|
||||||
src/video_core/buffer_cache/buffer.h
|
src/video_core/buffer_cache/buffer.h
|
||||||
src/video_core/buffer_cache/buffer_cache.cpp
|
src/video_core/buffer_cache/buffer_cache.cpp
|
||||||
|
|
|
@ -55,6 +55,10 @@ static constexpr auto HwInitPacketSize = 0x100u;
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
static constexpr std::array InitSequence{
|
static constexpr std::array InitSequence{
|
||||||
|
// A fake preamble to mimic context reset sent by FW
|
||||||
|
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||||
|
|
||||||
|
// Actual init state sequence
|
||||||
0xc0017600u, 0x216u, 0xffffffffu,
|
0xc0017600u, 0x216u, 0xffffffffu,
|
||||||
0xc0017600u, 0x217u, 0xffffffffu,
|
0xc0017600u, 0x217u, 0xffffffffu,
|
||||||
0xc0017600u, 0x215u, 0u,
|
0xc0017600u, 0x215u, 0u,
|
||||||
|
@ -94,9 +98,13 @@ static constexpr std::array InitSequence{
|
||||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||||
0xc0017900u, 0x200u, 0xe0000000u,
|
0xc0017900u, 0x200u, 0xe0000000u,
|
||||||
};
|
};
|
||||||
static_assert(InitSequence.size() == 0x73);
|
static_assert(InitSequence.size() == 0x73 + 2);
|
||||||
|
|
||||||
static constexpr std::array InitSequence175{
|
static constexpr std::array InitSequence175{
|
||||||
|
// A fake preamble to mimic context reset sent by FW
|
||||||
|
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||||
|
|
||||||
|
// Actual init state sequence
|
||||||
0xc0017600u, 0x216u, 0xffffffffu,
|
0xc0017600u, 0x216u, 0xffffffffu,
|
||||||
0xc0017600u, 0x217u, 0xffffffffu,
|
0xc0017600u, 0x217u, 0xffffffffu,
|
||||||
0xc0017600u, 0x215u, 0u,
|
0xc0017600u, 0x215u, 0u,
|
||||||
|
@ -136,9 +144,13 @@ static constexpr std::array InitSequence175{
|
||||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||||
0xc0017900u, 0x200u, 0xe0000000u,
|
0xc0017900u, 0x200u, 0xe0000000u,
|
||||||
};
|
};
|
||||||
static_assert(InitSequence175.size() == 0x73);
|
static_assert(InitSequence175.size() == 0x73 + 2);
|
||||||
|
|
||||||
static constexpr std::array InitSequence200{
|
static constexpr std::array InitSequence200{
|
||||||
|
// A fake preamble to mimic context reset sent by FW
|
||||||
|
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||||
|
|
||||||
|
// Actual init state sequence
|
||||||
0xc0017600u, 0x216u, 0xffffffffu,
|
0xc0017600u, 0x216u, 0xffffffffu,
|
||||||
0xc0017600u, 0x217u, 0xffffffffu,
|
0xc0017600u, 0x217u, 0xffffffffu,
|
||||||
0xc0017600u, 0x215u, 0u,
|
0xc0017600u, 0x215u, 0u,
|
||||||
|
@ -179,9 +191,13 @@ static constexpr std::array InitSequence200{
|
||||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||||
0xc0017900u, 0x200u, 0xe0000000u,
|
0xc0017900u, 0x200u, 0xe0000000u,
|
||||||
};
|
};
|
||||||
static_assert(InitSequence200.size() == 0x76);
|
static_assert(InitSequence200.size() == 0x76 + 2);
|
||||||
|
|
||||||
static constexpr std::array InitSequence350{
|
static constexpr std::array InitSequence350{
|
||||||
|
// A fake preamble to mimic context reset sent by FW
|
||||||
|
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||||
|
|
||||||
|
// Actual init state sequence
|
||||||
0xc0017600u, 0x216u, 0xffffffffu,
|
0xc0017600u, 0x216u, 0xffffffffu,
|
||||||
0xc0017600u, 0x217u, 0xffffffffu,
|
0xc0017600u, 0x217u, 0xffffffffu,
|
||||||
0xc0017600u, 0x215u, 0u,
|
0xc0017600u, 0x215u, 0u,
|
||||||
|
@ -224,7 +240,7 @@ static constexpr std::array InitSequence350{
|
||||||
0xc0017900u, 0x200u, 0xe0000000u,
|
0xc0017900u, 0x200u, 0xe0000000u,
|
||||||
0xc0016900u, 0x2aau, 0xffu,
|
0xc0016900u, 0x2aau, 0xffu,
|
||||||
};
|
};
|
||||||
static_assert(InitSequence350.size() == 0x7c);
|
static_assert(InitSequence350.size() == 0x7c + 2);
|
||||||
|
|
||||||
static constexpr std::array CtxInitSequence{
|
static constexpr std::array CtxInitSequence{
|
||||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||||
|
@ -735,11 +751,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||||
cmdbuf = ClearContextState(cmdbuf);
|
cmdbuf = ClearContextState(cmdbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(cmdbuf, InitSequence.data(), InitSequence.size() * 4);
|
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
|
||||||
cmdbuf += InitSequence.size();
|
cmdbuf += InitSequence.size() - 2;
|
||||||
|
|
||||||
const auto cmdbuf_left =
|
const auto cmdbuf_left =
|
||||||
HwInitPacketSize - InitSequence.size() - (clear_state ? 0xc : 0) - 1;
|
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||||
|
|
||||||
|
@ -757,10 +773,10 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdbuf = ClearContextState(cmdbuf);
|
cmdbuf = ClearContextState(cmdbuf);
|
||||||
std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4);
|
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
|
||||||
cmdbuf += InitSequence175.size();
|
cmdbuf += InitSequence175.size() - 2;
|
||||||
|
|
||||||
constexpr auto cmdbuf_left = HwInitPacketSize - InitSequence175.size() - 0xc - 1;
|
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1;
|
||||||
WriteTrailingNop<cmdbuf_left>(cmdbuf);
|
WriteTrailingNop<cmdbuf_left>(cmdbuf);
|
||||||
|
|
||||||
return HwInitPacketSize;
|
return HwInitPacketSize;
|
||||||
|
@ -778,11 +794,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
|
||||||
cmdbuf = ClearContextState(cmdbuf);
|
cmdbuf = ClearContextState(cmdbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(cmdbuf, InitSequence200.data(), InitSequence200.size() * 4);
|
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
|
||||||
cmdbuf += InitSequence200.size();
|
cmdbuf += InitSequence200.size() - 2;
|
||||||
|
|
||||||
const auto cmdbuf_left =
|
const auto cmdbuf_left =
|
||||||
HwInitPacketSize - InitSequence200.size() - (clear_state ? 0xc : 0) - 1;
|
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||||
|
|
||||||
|
@ -804,11 +820,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
||||||
cmdbuf = ClearContextState(cmdbuf);
|
cmdbuf = ClearContextState(cmdbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(cmdbuf, InitSequence350.data(), InitSequence350.size() * 4);
|
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
|
||||||
cmdbuf += InitSequence350.size();
|
cmdbuf += InitSequence350.size() - 2;
|
||||||
|
|
||||||
const auto cmdbuf_left =
|
const auto cmdbuf_left =
|
||||||
HwInitPacketSize - InitSequence350.size() - (clear_state ? 0xc : 0) - 1;
|
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||||
|
|
||||||
|
@ -1743,7 +1759,7 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier);
|
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f) | shader_modifier;
|
||||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
|
||||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
|
||||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
|
||||||
|
|
|
@ -185,14 +185,16 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) {
|
||||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_lock lock{port->port_mutex};
|
{
|
||||||
*status = port->flip_status;
|
std::unique_lock lock{port->port_mutex};
|
||||||
|
*status = port->flip_status;
|
||||||
|
}
|
||||||
|
|
||||||
LOG_INFO(Lib_VideoOut,
|
LOG_TRACE(Lib_VideoOut,
|
||||||
"count = {}, processTime = {}, tsc = {}, submitTsc = {}, flipArg = {}, gcQueueNum = "
|
"count = {}, processTime = {}, tsc = {}, submitTsc = {}, flipArg = {}, gcQueueNum = "
|
||||||
"{}, flipPendingNum = {}, currentBuffer = {}",
|
"{}, flipPendingNum = {}, currentBuffer = {}",
|
||||||
status->count, status->processTime, status->tsc, status->submitTsc, status->flipArg,
|
status->count, status->processTime, status->tsc, status->submitTsc, status->flipArg,
|
||||||
status->gcQueueNum, status->flipPendingNum, status->currentBuffer);
|
status->gcQueueNum, status->flipPendingNum, status->currentBuffer);
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -179,6 +179,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
spv::ExecutionModel execution_model{};
|
spv::ExecutionModel execution_model{};
|
||||||
ctx.AddCapability(spv::Capability::Image1D);
|
ctx.AddCapability(spv::Capability::Image1D);
|
||||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||||
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
if (info.uses_fp16) {
|
if (info.uses_fp16) {
|
||||||
ctx.AddCapability(spv::Capability::Float16);
|
ctx.AddCapability(spv::Capability::Float16);
|
||||||
ctx.AddCapability(spv::Capability::Int16);
|
ctx.AddCapability(spv::Capability::Int16);
|
||||||
|
|
|
@ -405,6 +405,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
return spv::ImageFormat::Rg16f;
|
return spv::ImageFormat::Rg16f;
|
||||||
}
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return spv::ImageFormat::Rg16Snorm;
|
||||||
|
}
|
||||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||||
return spv::ImageFormat::Rg8;
|
return spv::ImageFormat::Rg8;
|
||||||
|
|
|
@ -21,8 +21,13 @@ struct Compare {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static IR::Condition MakeCondition(Opcode opcode) {
|
static IR::Condition MakeCondition(const GcnInst& inst) {
|
||||||
switch (opcode) {
|
if (inst.IsCmpx()) {
|
||||||
|
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
|
||||||
|
return IR::Condition::Execnz;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (inst.opcode) {
|
||||||
case Opcode::S_CBRANCH_SCC0:
|
case Opcode::S_CBRANCH_SCC0:
|
||||||
return IR::Condition::Scc0;
|
return IR::Condition::Scc0;
|
||||||
case Opcode::S_CBRANCH_SCC1:
|
case Opcode::S_CBRANCH_SCC1:
|
||||||
|
@ -37,7 +42,6 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
||||||
return IR::Condition::Execnz;
|
return IR::Condition::Execnz;
|
||||||
case Opcode::S_AND_SAVEEXEC_B64:
|
case Opcode::S_AND_SAVEEXEC_B64:
|
||||||
case Opcode::S_ANDN2_B64:
|
case Opcode::S_ANDN2_B64:
|
||||||
case Opcode::V_CMPX_NE_U32:
|
|
||||||
return IR::Condition::Execnz;
|
return IR::Condition::Execnz;
|
||||||
default:
|
default:
|
||||||
return IR::Condition::True;
|
return IR::Condition::True;
|
||||||
|
@ -94,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
|
||||||
// While this instruction does not save EXEC it is often used paired
|
// While this instruction does not save EXEC it is often used paired
|
||||||
// with SAVEEXEC to mask the threads that didn't pass the condition
|
// with SAVEEXEC to mask the threads that didn't pass the condition
|
||||||
// of initial branch.
|
// of initial branch.
|
||||||
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
||||||
|
inst.opcode == Opcode::V_CMPX_NE_U32;
|
||||||
};
|
};
|
||||||
const auto is_close_scope = [](const GcnInst& inst) {
|
const auto is_close_scope = [](const GcnInst& inst) {
|
||||||
// Closing an EXEC scope can be either a branch instruction
|
// Closing an EXEC scope can be either a branch instruction
|
||||||
|
@ -104,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
|
||||||
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
||||||
// Those instructions need to be wrapped in the condition as well so allow branch
|
// Those instructions need to be wrapped in the condition as well so allow branch
|
||||||
// as end scope instruction.
|
// as end scope instruction.
|
||||||
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
|
inst.opcode == Opcode::S_CBRANCH_EXECZ ||
|
||||||
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Since we will be adding new labels, avoid iterating those as well.
|
// Since we will be adding new labels, avoid iterating those as well.
|
||||||
|
@ -171,7 +177,7 @@ void CFG::EmitBlocks() {
|
||||||
block->begin_index = GetIndex(start);
|
block->begin_index = GetIndex(start);
|
||||||
block->end_index = end_index;
|
block->end_index = end_index;
|
||||||
block->end_inst = end_inst;
|
block->end_inst = end_inst;
|
||||||
block->cond = MakeCondition(end_inst.opcode);
|
block->cond = MakeCondition(end_inst);
|
||||||
blocks.insert(*block);
|
blocks.insert(*block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GcnInst::IsCmpx() const {
|
||||||
|
if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
|
||||||
|
(opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
|
||||||
|
(opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
|
||||||
|
(opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
|
||||||
|
(opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
|
||||||
|
(opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
|
||||||
|
(opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
|
||||||
|
(opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -203,6 +203,7 @@ struct GcnInst {
|
||||||
bool IsUnconditionalBranch() const;
|
bool IsUnconditionalBranch() const;
|
||||||
bool IsConditionalBranch() const;
|
bool IsConditionalBranch() const;
|
||||||
bool IsFork() const;
|
bool IsFork() const;
|
||||||
|
bool IsCmpx() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -280,6 +280,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||||
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
return V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||||
case Opcode::V_CMP_LT_I32:
|
case Opcode::V_CMP_LT_I32:
|
||||||
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
return V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||||
|
case Opcode::V_CMPX_GT_I32:
|
||||||
|
return V_CMP_U32(ConditionOp::GT, true, true, inst);
|
||||||
case Opcode::V_CMPX_LT_I32:
|
case Opcode::V_CMPX_LT_I32:
|
||||||
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
return V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||||
case Opcode::V_CMPX_F_U32:
|
case Opcode::V_CMPX_F_U32:
|
||||||
|
|
|
@ -246,10 +246,7 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// Samplers with different bindings might still be the same.
|
// Samplers with different bindings might still be the same.
|
||||||
const auto old_sharp =
|
return existing.GetSsharp(info) == desc.GetSsharp(info);
|
||||||
info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
|
|
||||||
const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
|
|
||||||
return old_sharp == new_sharp;
|
|
||||||
})};
|
})};
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
namespace AmdGpu {
|
||||||
|
|
||||||
|
// The following values are taken from fpPS4:
|
||||||
|
// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410
|
||||||
|
//
|
||||||
|
static constexpr std::array reg_array_default{
|
||||||
|
0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u,
|
||||||
|
0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u,
|
||||||
|
0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u,
|
||||||
|
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
|
||||||
|
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
|
||||||
|
0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u,
|
||||||
|
0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u,
|
||||||
|
0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||||
|
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
|
||||||
|
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||||
|
0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u,
|
||||||
|
0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u,
|
||||||
|
0x2a00161au,
|
||||||
|
};
|
||||||
|
|
||||||
|
void Liverpool::Regs::SetDefaults() {
|
||||||
|
std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32));
|
||||||
|
|
||||||
|
std::memcpy(®_array[ContextRegWordOffset + 0x80], reg_array_default.data(),
|
||||||
|
reg_array_default.size() * sizeof(u32));
|
||||||
|
|
||||||
|
// Individual context regs values
|
||||||
|
reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu;
|
||||||
|
reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace AmdGpu
|
|
@ -216,6 +216,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::ClearState: {
|
case PM4ItOpcode::ClearState: {
|
||||||
|
regs.SetDefaults();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetConfigReg: {
|
case PM4ItOpcode::SetConfigReg: {
|
||||||
|
|
|
@ -1017,6 +1017,8 @@ struct Liverpool {
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetDefaults();
|
||||||
};
|
};
|
||||||
|
|
||||||
Regs regs{};
|
Regs regs{};
|
||||||
|
|
|
@ -38,9 +38,9 @@ struct UniqueBuffer {
|
||||||
UniqueBuffer& operator=(const UniqueBuffer&) = delete;
|
UniqueBuffer& operator=(const UniqueBuffer&) = delete;
|
||||||
|
|
||||||
UniqueBuffer(UniqueBuffer&& other)
|
UniqueBuffer(UniqueBuffer&& other)
|
||||||
: buffer{std::exchange(other.buffer, VK_NULL_HANDLE)},
|
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||||
allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
|
||||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
|
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {}
|
||||||
UniqueBuffer& operator=(UniqueBuffer&& other) {
|
UniqueBuffer& operator=(UniqueBuffer&& other) {
|
||||||
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
|
buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
|
||||||
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
||||||
|
|
|
@ -274,6 +274,7 @@ bool Instance::CreateDevice() {
|
||||||
.independentBlend = features.independentBlend,
|
.independentBlend = features.independentBlend,
|
||||||
.geometryShader = features.geometryShader,
|
.geometryShader = features.geometryShader,
|
||||||
.logicOp = features.logicOp,
|
.logicOp = features.logicOp,
|
||||||
|
.depthBiasClamp = features.depthBiasClamp,
|
||||||
.multiViewport = features.multiViewport,
|
.multiViewport = features.multiViewport,
|
||||||
.samplerAnisotropy = features.samplerAnisotropy,
|
.samplerAnisotropy = features.samplerAnisotropy,
|
||||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||||
|
|
|
@ -138,8 +138,8 @@ void Rasterizer::BeginRendering() {
|
||||||
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
|
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
|
||||||
if (regs.depth_buffer.Address() != 0 &&
|
if (regs.depth_buffer.Address() != 0 &&
|
||||||
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
|
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
|
||||||
regs.depth_control.stencil_enable &&
|
(regs.depth_control.stencil_enable &&
|
||||||
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid)) {
|
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) {
|
||||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||||
texture_cache.IsMetaCleared(htile_address);
|
texture_cache.IsMetaCleared(htile_address);
|
||||||
|
|
|
@ -43,9 +43,9 @@ struct UniqueImage {
|
||||||
UniqueImage& operator=(const UniqueImage&) = delete;
|
UniqueImage& operator=(const UniqueImage&) = delete;
|
||||||
|
|
||||||
UniqueImage(UniqueImage&& other)
|
UniqueImage(UniqueImage&& other)
|
||||||
: image{std::exchange(other.image, VK_NULL_HANDLE)},
|
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
||||||
allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
|
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
|
||||||
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {}
|
image{std::exchange(other.image, VK_NULL_HANDLE)} {}
|
||||||
UniqueImage& operator=(UniqueImage&& other) {
|
UniqueImage& operator=(UniqueImage&& other) {
|
||||||
image = std::exchange(other.image, VK_NULL_HANDLE);
|
image = std::exchange(other.image, VK_NULL_HANDLE);
|
||||||
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/texture_cache/image.h"
|
#include "video_core/texture_cache/image.h"
|
||||||
|
@ -110,7 +111,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
||||||
|
|
||||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||||
ImageId image_id_, std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
ImageId image_id_, std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
||||||
: info{info_}, image_id{image_id_} {
|
: image_id{image_id_}, info{info_} {
|
||||||
vk::ImageViewUsageCreateInfo usage_ci{};
|
vk::ImageViewUsageCreateInfo usage_ci{};
|
||||||
if (usage_override) {
|
if (usage_override) {
|
||||||
usage_ci.usage = usage_override.value();
|
usage_ci.usage = usage_override.value();
|
||||||
|
|
|
@ -342,12 +342,6 @@ TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*
|
||||||
.usage = usage,
|
.usage = usage,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __APPLE__
|
|
||||||
// Fix for detiler artifacts on macOS
|
|
||||||
const bool is_large_buffer = true;
|
|
||||||
#else
|
|
||||||
const bool is_large_buffer = size > 128_MB;
|
|
||||||
#endif
|
|
||||||
VmaAllocationCreateInfo alloc_info{
|
VmaAllocationCreateInfo alloc_info{
|
||||||
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
|
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
|
||||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
|
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
|
||||||
|
@ -462,7 +456,6 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
||||||
(m > 0 ? params.sizes[m - 1] : 0);
|
(m > 0 ? params.sizes[m - 1] : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto pitch = image.info.pitch;
|
|
||||||
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
|
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
|
||||||
¶ms);
|
¶ms);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue