Merge pull request #253 from shadps4-emu/graphics/features_and_fixes
Graphics: missing features and fixes
This commit is contained in:
commit
189033ae24
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
|
@ -264,6 +265,7 @@ static_assert(CtxInitSequence400.size() == 0x61);
|
||||||
|
|
||||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||||
static u32 submission_lock{};
|
static u32 submission_lock{};
|
||||||
|
std::condition_variable cv_lock{};
|
||||||
static std::mutex m_submission{};
|
static std::mutex m_submission{};
|
||||||
static u64 frames_submitted{}; // frame counter
|
static u64 frames_submitted{}; // frame counter
|
||||||
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||||
|
@ -277,6 +279,18 @@ struct AscQueueInfo {
|
||||||
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||||
static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL;
|
static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL;
|
||||||
|
|
||||||
|
static void ResetSubmissionLock(Platform::InterruptId irq) {
|
||||||
|
std::unique_lock lock{m_submission};
|
||||||
|
submission_lock = 0;
|
||||||
|
cv_lock.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void WaitGpuIdle() {
|
||||||
|
HLE_TRACE;
|
||||||
|
std::unique_lock lock{m_submission};
|
||||||
|
cv_lock.wait(lock, [] { return submission_lock == 0; });
|
||||||
|
}
|
||||||
|
|
||||||
static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) {
|
static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) {
|
||||||
using namespace Common::FS;
|
using namespace Common::FS;
|
||||||
const auto dump_dir = GetUserPath(PathType::PM4Dir);
|
const auto dump_dir = GetUserPath(PathType::PM4Dir);
|
||||||
|
@ -465,14 +479,9 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_lock lock{m_submission};
|
WaitGpuIdle();
|
||||||
if (submission_lock != 0) {
|
|
||||||
liverpool->WaitGpuIdle();
|
|
||||||
|
|
||||||
// Suspend logic goes here
|
/* Suspend logic goes here */
|
||||||
|
|
||||||
submission_lock = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto vqid = gnm_vqid - 1;
|
auto vqid = gnm_vqid - 1;
|
||||||
auto& asc_queue = asc_queues[{vqid}];
|
auto& asc_queue = asc_queues[{vqid}];
|
||||||
|
@ -863,9 +872,9 @@ int PS4_SYSV_ABI sceGnmEndWorkload() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmFindResourcesPublic() {
|
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
return ORBIS_OK;
|
return ORBIS_GNM_ERROR_FAILURE; // not available in retail FW
|
||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmFlushGarlic() {
|
void PS4_SYSV_ABI sceGnmFlushGarlic() {
|
||||||
|
@ -1321,7 +1330,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
|
|
||||||
if (shader_id > 1) {
|
if (shader_id > 1) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
||||||
return 0x8eee00ff;
|
return ORBIS_GNM_ERROR_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -1391,7 +1400,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
|
|
||||||
if (shader_id != 0) {
|
if (shader_id != 0) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
|
||||||
return 0x8eee00ff;
|
return ORBIS_GNM_ERROR_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A fullscreen triangle with one uv set
|
// A fullscreen triangle with one uv set
|
||||||
|
@ -1930,13 +1939,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (submission_lock != 0) {
|
WaitGpuIdle();
|
||||||
liverpool->WaitGpuIdle();
|
|
||||||
|
|
||||||
// Suspend logic goes here
|
/* Suspend logic goes here */
|
||||||
|
|
||||||
submission_lock = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (send_init_packet) {
|
if (send_init_packet) {
|
||||||
if (sdk_version <= 0x1ffffffu) {
|
if (sdk_version <= 0x1ffffffu) {
|
||||||
|
@ -1990,7 +1995,6 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||||
if (!liverpool->IsGpuIdle()) {
|
if (!liverpool->IsGpuIdle()) {
|
||||||
submission_lock = true;
|
submission_lock = true;
|
||||||
}
|
}
|
||||||
liverpool->NotifySubmitDone();
|
|
||||||
send_init_packet = true;
|
send_init_packet = true;
|
||||||
++frames_submitted;
|
++frames_submitted;
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
|
@ -2471,6 +2475,9 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
|
||||||
sdk_version = 0;
|
sdk_version = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
|
||||||
|
nullptr);
|
||||||
|
|
||||||
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
|
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
|
||||||
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
|
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
|
||||||
sceGnmAreSubmitsAllowed);
|
sceGnmAreSubmitsAllowed);
|
||||||
|
|
|
@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceGnmDriverInternalVirtualQuery();
|
||||||
int PS4_SYSV_ABI sceGnmDriverTraceInProgress();
|
int PS4_SYSV_ABI sceGnmDriverTraceInProgress();
|
||||||
int PS4_SYSV_ABI sceGnmDriverTriggerCapture();
|
int PS4_SYSV_ABI sceGnmDriverTriggerCapture();
|
||||||
int PS4_SYSV_ABI sceGnmEndWorkload();
|
int PS4_SYSV_ABI sceGnmEndWorkload();
|
||||||
int PS4_SYSV_ABI sceGnmFindResourcesPublic();
|
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic();
|
||||||
void PS4_SYSV_ABI sceGnmFlushGarlic();
|
void PS4_SYSV_ABI sceGnmFlushGarlic();
|
||||||
int PS4_SYSV_ABI sceGnmGetCoredumpAddress();
|
int PS4_SYSV_ABI sceGnmGetCoredumpAddress();
|
||||||
int PS4_SYSV_ABI sceGnmGetCoredumpMode();
|
int PS4_SYSV_ABI sceGnmGetCoredumpMode();
|
||||||
|
|
|
@ -26,6 +26,7 @@ enum class InterruptId : u32 {
|
||||||
Compute6RelMem = 6u,
|
Compute6RelMem = 6u,
|
||||||
GfxEop = 7u,
|
GfxEop = 7u,
|
||||||
GfxFlip = 8u,
|
GfxFlip = 8u,
|
||||||
|
GpuIdle = 9u,
|
||||||
};
|
};
|
||||||
|
|
||||||
using IrqHandler = std::function<void(InterruptId)>;
|
using IrqHandler = std::function<void(InterruptId)>;
|
||||||
|
|
|
@ -135,16 +135,34 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& param{ctx.input_params.at(index)};
|
const auto& param{ctx.input_params.at(index)};
|
||||||
|
if (param.buffer_handle < 0) {
|
||||||
if (!ValidId(param.id)) {
|
if (!ValidId(param.id)) {
|
||||||
// Attribute is disabled or varying component is not written
|
// Attribute is disabled or varying component is not written
|
||||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (param.num_components > 1) {
|
if (param.num_components > 1) {
|
||||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
const Id pointer{
|
||||||
|
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||||
return ctx.OpLoad(param.component_type, pointer);
|
return ctx.OpLoad(param.component_type, pointer);
|
||||||
} else {
|
} else {
|
||||||
return ctx.OpLoad(param.component_type, param.id);
|
return ctx.OpLoad(param.component_type, param.id);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
|
||||||
|
const auto step_rate = ctx.OpLoad(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||||
|
ctx.instance_step_rates, rate_idx));
|
||||||
|
const auto offset = ctx.OpIAdd(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpIMul(
|
||||||
|
ctx.U32[1],
|
||||||
|
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
||||||
|
ctx.ConstU32(param.num_components)),
|
||||||
|
ctx.ConstU32(comp));
|
||||||
|
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::FragCoord: {
|
case IR::Attribute::FragCoord: {
|
||||||
|
|
|
@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||||
|
|
||||||
void EmitContext::DefineInputs(const Info& info) {
|
void EmitContext::DefineInputs(const Info& info) {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Stage::Vertex:
|
case Stage::Vertex: {
|
||||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||||
|
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||||
|
|
||||||
|
// Create push constants block for instance steps rates
|
||||||
|
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
|
||||||
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
|
MemberName(struct_type, 0, "sr0");
|
||||||
|
MemberName(struct_type, 1, "sr1");
|
||||||
|
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||||
|
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||||
|
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||||
|
Name(instance_step_rates, "step_rates");
|
||||||
|
interfaces.push_back(instance_step_rates);
|
||||||
|
|
||||||
for (const auto& input : info.vs_inputs) {
|
for (const auto& input : info.vs_inputs) {
|
||||||
const Id type{GetAttributeType(*this, input.fmt)};
|
const Id type{GetAttributeType(*this, input.fmt)};
|
||||||
const Id id{DefineInput(type, input.binding)};
|
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
|
||||||
|
const u32 rate_idx =
|
||||||
|
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
|
||||||
|
: 1;
|
||||||
|
// Note that we pass index rather than Id
|
||||||
|
input_params[input.binding] = {
|
||||||
|
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
Id id{DefineInput(type, input.binding)};
|
||||||
|
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
|
||||||
|
Name(id, fmt::format("vs_instance_attr{}", input.binding));
|
||||||
|
} else {
|
||||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||||
|
}
|
||||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case Stage::Fragment:
|
case Stage::Fragment:
|
||||||
if (info.uses_group_quad) {
|
if (info.uses_group_quad) {
|
||||||
subgroup_local_invocation_id = DefineVariable(
|
subgroup_local_invocation_id = DefineVariable(
|
||||||
|
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
|
||||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||||
const auto name =
|
const auto name =
|
||||||
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
buffer.is_instance_data
|
||||||
|
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
|
||||||
|
sizeof(float) * CHAR_BIT)
|
||||||
|
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||||
Name(struct_type, name);
|
Name(struct_type, name);
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, 0, "data");
|
MemberName(struct_type, 0, "data");
|
||||||
|
@ -317,6 +350,14 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
return spv::ImageFormat::Rg32f;
|
return spv::ImageFormat::Rg32f;
|
||||||
}
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||||
|
return spv::ImageFormat::Rg32ui;
|
||||||
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||||
|
return spv::ImageFormat::Rgba32ui;
|
||||||
|
}
|
||||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
return spv::ImageFormat::R16f;
|
return spv::ImageFormat::R16f;
|
||||||
|
|
|
@ -165,6 +165,8 @@ public:
|
||||||
|
|
||||||
Id output_position{};
|
Id output_position{};
|
||||||
Id vertex_index{};
|
Id vertex_index{};
|
||||||
|
Id instance_id{};
|
||||||
|
Id instance_step_rates{};
|
||||||
Id base_vertex{};
|
Id base_vertex{};
|
||||||
Id frag_coord{};
|
Id frag_coord{};
|
||||||
Id front_facing{};
|
Id front_facing{};
|
||||||
|
@ -214,6 +216,7 @@ public:
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
Id component_type;
|
Id component_type;
|
||||||
u32 num_components;
|
u32 num_components;
|
||||||
|
s32 buffer_handle{-1};
|
||||||
};
|
};
|
||||||
std::array<SpirvAttribute, 32> input_params{};
|
std::array<SpirvAttribute, 32> input_params{};
|
||||||
std::array<SpirvAttribute, 32> output_params{};
|
std::array<SpirvAttribute, 32> output_params{};
|
||||||
|
|
|
@ -235,9 +235,22 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
ir.SetVectorReg(dst_reg++, comp);
|
ir.SetVectorReg(dst_reg++, comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
|
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||||
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
|
// shader, so VBs should be bound as regular data buffers
|
||||||
attrib.instance_data);
|
s32 instance_buf_handle = -1;
|
||||||
|
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
||||||
|
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||||
|
step_rate == Info::VsInput::OverStepRate1) {
|
||||||
|
info.buffers.push_back({
|
||||||
|
.sgpr_base = attrib.sgpr_base,
|
||||||
|
.dword_offset = attrib.dword_offset,
|
||||||
|
.stride = buffer.GetStride(),
|
||||||
|
.num_records = buffer.num_records,
|
||||||
|
.used_types = IR::Type::F32,
|
||||||
|
.is_storage = true, // we may not fit into UBO with large meshes
|
||||||
|
.is_instance_data = true,
|
||||||
|
});
|
||||||
|
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||||
|
@ -247,7 +260,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||||
.sgpr_base = attrib.sgpr_base,
|
.sgpr_base = attrib.sgpr_base,
|
||||||
.dword_offset = attrib.dword_offset,
|
.dword_offset = attrib.dword_offset,
|
||||||
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
|
.instance_step_rate = step_rate,
|
||||||
|
.instance_data_buf = instance_buf_handle,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,13 +165,14 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
||||||
if (!flags.test(MimgModifier::Pcf)) {
|
if (!flags.test(MimgModifier::Pcf)) {
|
||||||
return ir.ImageGather(handle, body, offset, {}, info);
|
return ir.ImageGather(handle, body, offset, {}, info);
|
||||||
}
|
}
|
||||||
|
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
|
||||||
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
|
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
// For gather4 instructions dmask selects which component to read and must have
|
||||||
|
// only one bit set to 1
|
||||||
|
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
if (((mimg.dmask >> i) & 1) == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||||
ir.SetVectorReg(dest_reg++, value);
|
ir.SetVectorReg(dest_reg++, value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,8 @@ struct BufferResource {
|
||||||
u32 num_records;
|
u32 num_records;
|
||||||
IR::Type used_types;
|
IR::Type used_types;
|
||||||
AmdGpu::Buffer inline_cbuf;
|
AmdGpu::Buffer inline_cbuf;
|
||||||
bool is_storage;
|
bool is_storage{false};
|
||||||
|
bool is_instance_data{false};
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||||
};
|
};
|
||||||
|
@ -116,6 +117,7 @@ struct Info {
|
||||||
u8 sgpr_base;
|
u8 sgpr_base;
|
||||||
u8 dword_offset;
|
u8 dword_offset;
|
||||||
InstanceIdType instance_step_rate;
|
InstanceIdType instance_step_rate;
|
||||||
|
s32 instance_data_buf;
|
||||||
};
|
};
|
||||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||||
|
|
||||||
|
|
|
@ -66,19 +66,8 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (submit_done) {
|
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
|
||||||
std::scoped_lock lk{submit_mutex};
|
|
||||||
submit_cv.notify_all();
|
|
||||||
submit_done = false;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Liverpool::WaitGpuIdle() {
|
|
||||||
RENDERER_TRACE;
|
|
||||||
|
|
||||||
std::unique_lock lk{submit_mutex};
|
|
||||||
submit_cv.wait(lk, [this] { return num_submits == 0; });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
|
|
|
@ -887,7 +887,10 @@ struct Liverpool {
|
||||||
IndexBufferType index_buffer_type;
|
IndexBufferType index_buffer_type;
|
||||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||||
u32 enable_primitive_id;
|
u32 enable_primitive_id;
|
||||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
|
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
|
||||||
|
u32 vgt_instance_step_rate_0;
|
||||||
|
u32 vgt_instance_step_rate_1;
|
||||||
|
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
|
||||||
PolygonOffset poly_offset;
|
PolygonOffset poly_offset;
|
||||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||||
AaConfig aa_config;
|
AaConfig aa_config;
|
||||||
|
@ -937,18 +940,10 @@ public:
|
||||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
||||||
|
|
||||||
void WaitGpuIdle();
|
|
||||||
|
|
||||||
bool IsGpuIdle() const {
|
bool IsGpuIdle() const {
|
||||||
return num_submits == 0;
|
return num_submits == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void NotifySubmitDone() {
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
|
||||||
submit_done = true;
|
|
||||||
submit_cv.notify_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
||||||
rasterizer = rasterizer_;
|
rasterizer = rasterizer_;
|
||||||
}
|
}
|
||||||
|
@ -1017,7 +1012,6 @@ private:
|
||||||
u32 num_submits{};
|
u32 num_submits{};
|
||||||
std::mutex submit_mutex;
|
std::mutex submit_mutex;
|
||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
std::atomic<bool> submit_done{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
|
@ -1055,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
||||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||||
|
|
|
@ -321,6 +321,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eBc4UnormBlock;
|
return vk::Format::eBc4UnormBlock;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return vk::Format::eBc5UnormBlock;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
num_format == AmdGpu::NumberFormat::Sint) {
|
num_format == AmdGpu::NumberFormat::Sint) {
|
||||||
return vk::Format::eR16G16B16A16Sint;
|
return vk::Format::eR16G16B16A16Sint;
|
||||||
|
@ -366,6 +369,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eR8G8Unorm;
|
return vk::Format::eR8G8Unorm;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return vk::Format::eR8G8Snorm;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eBc7UnormBlock;
|
return vk::Format::eBc7UnormBlock;
|
||||||
}
|
}
|
||||||
|
@ -429,6 +435,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eR16Unorm;
|
return vk::Format::eR16Unorm;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return vk::Format::eR16G16B16A16Unorm;
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
stages[i] = *infos[i];
|
stages[i] = *infos[i];
|
||||||
}
|
}
|
||||||
BuildDescSetLayout();
|
BuildDescSetLayout();
|
||||||
|
|
||||||
|
const vk::PushConstantRange push_constants = {
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eVertex,
|
||||||
|
.offset = 0,
|
||||||
|
.size = 2 * sizeof(u32),
|
||||||
|
};
|
||||||
|
|
||||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
.setLayoutCount = 1U,
|
.setLayoutCount = 1U,
|
||||||
.pSetLayouts = &set_layout,
|
.pSetLayouts = &set_layout,
|
||||||
.pushConstantRangeCount = 0,
|
.pushConstantRangeCount = 1,
|
||||||
.pPushConstantRanges = nullptr,
|
.pPushConstantRanges = &push_constants,
|
||||||
};
|
};
|
||||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||||
|
|
||||||
|
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
|
||||||
const auto& vs_info = stages[0];
|
const auto& vs_info = stages[0];
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& input : vs_info.vs_inputs) {
|
||||||
|
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
// Skip attribute binding as the data will be pulled by shader
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||||
attributes.push_back({
|
attributes.push_back({
|
||||||
.location = input.binding,
|
.location = input.binding,
|
||||||
|
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
|
||||||
// Calculate buffers memory overlaps
|
// Calculate buffers memory overlaps
|
||||||
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
|
||||||
for (const auto& input : vs_info.vs_inputs) {
|
for (const auto& input : vs_info.vs_inputs) {
|
||||||
|
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||||
|
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -67,20 +67,24 @@ public:
|
||||||
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
VideoCore::TextureCache& texture_cache) const;
|
VideoCore::TextureCache& texture_cache) const;
|
||||||
|
|
||||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
vk::Pipeline Handle() const noexcept {
|
||||||
return *pipeline;
|
return *pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
vk::PipelineLayout GetLayout() const {
|
||||||
|
return *pipeline_layout;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsEmbeddedVs() const noexcept {
|
||||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||||
return key.stage_hashes[0] == EmbeddedVsHash;
|
return key.stage_hashes[0] == EmbeddedVsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] auto GetWriteMasks() const {
|
auto GetWriteMasks() const {
|
||||||
return key.write_masks;
|
return key.write_masks;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsDepthEnabled() const {
|
bool IsDepthEnabled() const {
|
||||||
return key.depth.depth_enable.Value();
|
return key.depth.depth_enable.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -183,7 +183,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
int remapped_cb{};
|
int remapped_cb{};
|
||||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||||
auto const& col_buf = regs.color_buffers[cb];
|
auto const& col_buf = regs.color_buffers[cb];
|
||||||
if (!col_buf || skip_cb_binding) {
|
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto base_format =
|
const auto base_format =
|
||||||
|
|
|
@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
UpdateDynamicState(*pipeline);
|
UpdateDynamicState(*pipeline);
|
||||||
|
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||||
|
|
||||||
|
const u32 step_rates[] = {
|
||||||
|
regs.vgt_instance_step_rate_0,
|
||||||
|
regs.vgt_instance_step_rate_1,
|
||||||
|
};
|
||||||
|
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
|
||||||
|
sizeof(step_rates), &step_rates);
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -99,6 +106,12 @@ void Rasterizer::BeginRendering() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the color buffer is still bound but rendering to it is disabled by the target mask,
|
||||||
|
// we need to prevent the render area from being affected by unbound render target extents.
|
||||||
|
if (!regs.color_target_mask.GetMask(col_buf_id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||||
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
|
||||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||||
|
|
|
@ -189,10 +189,14 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
case vk::Format::eR32Uint:
|
case vk::Format::eR32Uint:
|
||||||
return vk::Format::eR32Uint;
|
return vk::Format::eR32Uint;
|
||||||
case vk::Format::eBc1RgbaUnormBlock:
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
case vk::Format::eBc4UnormBlock:
|
||||||
case vk::Format::eR32G32Sfloat:
|
case vk::Format::eR32G32Sfloat:
|
||||||
return vk::Format::eR32G32Uint;
|
return vk::Format::eR32G32Uint;
|
||||||
|
case vk::Format::eBc2SrgbBlock:
|
||||||
|
case vk::Format::eBc2UnormBlock:
|
||||||
case vk::Format::eBc3SrgbBlock:
|
case vk::Format::eBc3SrgbBlock:
|
||||||
case vk::Format::eBc3UnormBlock:
|
case vk::Format::eBc3UnormBlock:
|
||||||
|
case vk::Format::eBc5UnormBlock:
|
||||||
case vk::Format::eBc7SrgbBlock:
|
case vk::Format::eBc7SrgbBlock:
|
||||||
case vk::Format::eBc7UnormBlock:
|
case vk::Format::eBc7UnormBlock:
|
||||||
return vk::Format::eR32G32B32A32Uint;
|
return vk::Format::eR32G32B32A32Uint;
|
||||||
|
|
Loading…
Reference in New Issue