Merge pull request #253 from shadps4-emu/graphics/features_and_fixes

Graphics: missing features and fixes
This commit is contained in:
georgemoralis 2024-07-07 20:42:13 +03:00 committed by GitHub
commit 189033ae24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 192 additions and 71 deletions

View File

@ -3,6 +3,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h" #include "common/config.h"
#include "common/debug.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/path_util.h" #include "common/path_util.h"
#include "common/slot_vector.h" #include "common/slot_vector.h"
@ -264,6 +265,7 @@ static_assert(CtxInitSequence400.size() == 0x61);
// In case if `submitDone` is issued we need to block submissions until GPU idle // In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{}; static u32 submission_lock{};
std::condition_variable cv_lock{};
static std::mutex m_submission{}; static std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter static u64 frames_submitted{}; // frame counter
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
@ -277,6 +279,18 @@ struct AscQueueInfo {
static Common::SlotVector<AscQueueInfo> asc_queues{}; static Common::SlotVector<AscQueueInfo> asc_queues{};
static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL; static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL;
static void ResetSubmissionLock(Platform::InterruptId irq) {
std::unique_lock lock{m_submission};
submission_lock = 0;
cv_lock.notify_all();
}
static void WaitGpuIdle() {
HLE_TRACE;
std::unique_lock lock{m_submission};
cv_lock.wait(lock, [] { return submission_lock == 0; });
}
static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) { static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) {
using namespace Common::FS; using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::PM4Dir); const auto dump_dir = GetUserPath(PathType::PM4Dir);
@ -465,14 +479,9 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
return; return;
} }
std::unique_lock lock{m_submission}; WaitGpuIdle();
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
// Suspend logic goes here /* Suspend logic goes here */
submission_lock = 0;
}
auto vqid = gnm_vqid - 1; auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}]; auto& asc_queue = asc_queues[{vqid}];
@ -863,9 +872,9 @@ int PS4_SYSV_ABI sceGnmEndWorkload() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmFindResourcesPublic() { s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK; return ORBIS_GNM_ERROR_FAILURE; // not available in retail FW
} }
void PS4_SYSV_ABI sceGnmFlushGarlic() { void PS4_SYSV_ABI sceGnmFlushGarlic() {
@ -1321,7 +1330,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id > 1) { if (shader_id > 1) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff; return ORBIS_GNM_ERROR_FAILURE;
} }
// clang-format off // clang-format off
@ -1391,7 +1400,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id != 0) { if (shader_id != 0) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff; return ORBIS_GNM_ERROR_FAILURE;
} }
// A fullscreen triangle with one uv set // A fullscreen triangle with one uv set
@ -1930,13 +1939,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
} }
} }
if (submission_lock != 0) { WaitGpuIdle();
liverpool->WaitGpuIdle();
// Suspend logic goes here /* Suspend logic goes here */
submission_lock = 0;
}
if (send_init_packet) { if (send_init_packet) {
if (sdk_version <= 0x1ffffffu) { if (sdk_version <= 0x1ffffffu) {
@ -1990,7 +1995,6 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
if (!liverpool->IsGpuIdle()) { if (!liverpool->IsGpuIdle()) {
submission_lock = true; submission_lock = true;
} }
liverpool->NotifySubmitDone();
send_init_packet = true; send_init_packet = true;
++frames_submitted; ++frames_submitted;
return ORBIS_OK; return ORBIS_OK;
@ -2471,6 +2475,9 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
sdk_version = 0; sdk_version = 0;
} }
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
nullptr);
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent); LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
sceGnmAreSubmitsAllowed); sceGnmAreSubmitsAllowed);

View File

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceGnmDriverInternalVirtualQuery();
int PS4_SYSV_ABI sceGnmDriverTraceInProgress(); int PS4_SYSV_ABI sceGnmDriverTraceInProgress();
int PS4_SYSV_ABI sceGnmDriverTriggerCapture(); int PS4_SYSV_ABI sceGnmDriverTriggerCapture();
int PS4_SYSV_ABI sceGnmEndWorkload(); int PS4_SYSV_ABI sceGnmEndWorkload();
int PS4_SYSV_ABI sceGnmFindResourcesPublic(); s32 PS4_SYSV_ABI sceGnmFindResourcesPublic();
void PS4_SYSV_ABI sceGnmFlushGarlic(); void PS4_SYSV_ABI sceGnmFlushGarlic();
int PS4_SYSV_ABI sceGnmGetCoredumpAddress(); int PS4_SYSV_ABI sceGnmGetCoredumpAddress();
int PS4_SYSV_ABI sceGnmGetCoredumpMode(); int PS4_SYSV_ABI sceGnmGetCoredumpMode();

View File

@ -26,6 +26,7 @@ enum class InterruptId : u32 {
Compute6RelMem = 6u, Compute6RelMem = 6u,
GfxEop = 7u, GfxEop = 7u,
GfxFlip = 8u, GfxFlip = 8u,
GpuIdle = 9u,
}; };
using IrqHandler = std::function<void(InterruptId)>; using IrqHandler = std::function<void(InterruptId)>;

View File

@ -135,16 +135,34 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) { if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)}; const auto& param{ctx.input_params.at(index)};
if (param.buffer_handle < 0) {
if (!ValidId(param.id)) { if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written // Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
} }
if (param.num_components > 1) { if (param.num_components > 1) {
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer); return ctx.OpLoad(param.component_type, pointer);
} else { } else {
return ctx.OpLoad(param.component_type, param.id); return ctx.OpLoad(param.component_type, param.id);
} }
} else {
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
const auto step_rate = ctx.OpLoad(
ctx.U32[1],
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates, rate_idx));
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
}
} }
switch (attr) { switch (attr) {
case IR::Attribute::FragCoord: { case IR::Attribute::FragCoord: {

View File

@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineInputs(const Info& info) {
switch (stage) { switch (stage) {
case Stage::Vertex: case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) { for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)}; const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)}; if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding));
} else {
Name(id, fmt::format("vs_in_attr{}", input.binding)); Name(id, fmt::format("vs_in_attr{}", input.binding));
}
input_params[input.binding] = GetAttributeInfo(input.fmt, id); input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id); interfaces.push_back(id);
} }
}
break; break;
}
case Stage::Fragment: case Stage::Fragment:
if (info.uses_group_quad) { if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable( subgroup_local_invocation_id = DefineVariable(
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4); Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name = const auto name =
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name); Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block); Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data"); MemberName(struct_type, 0, "data");
@ -317,6 +350,14 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg32f; return spv::ImageFormat::Rg32f;
} }
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rg32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rgba32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 && if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R16f; return spv::ImageFormat::R16f;

View File

@ -165,6 +165,8 @@ public:
Id output_position{}; Id output_position{};
Id vertex_index{}; Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id base_vertex{}; Id base_vertex{};
Id frag_coord{}; Id frag_coord{};
Id front_facing{}; Id front_facing{};
@ -214,6 +216,7 @@ public:
Id pointer_type; Id pointer_type;
Id component_type; Id component_type;
u32 num_components; u32 num_components;
s32 buffer_handle{-1};
}; };
std::array<SpirvAttribute, 32> input_params{}; std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{}; std::array<SpirvAttribute, 32> output_params{};

View File

@ -235,9 +235,22 @@ void Translator::EmitFetch(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, comp); ir.SetVectorReg(dst_reg++, comp);
} }
if (attrib.instance_data == 2 || attrib.instance_data == 3) { // In case of programmable step rates we need to fallback to instance data pulling in
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}", // shader, so VBs should be bound as regular data buffers
attrib.instance_data); s32 instance_buf_handle = -1;
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.stride = buffer.GetStride(),
.num_records = buffer.num_records,
.used_types = IR::Type::F32,
.is_storage = true, // we may not fit into UBO with large meshes
.is_instance_data = true,
});
instance_buf_handle = s32(info.buffers.size() - 1);
} }
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
@ -247,7 +260,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
.num_components = std::min<u16>(attrib.num_elements, num_components), .num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base, .sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset, .dword_offset = attrib.dword_offset,
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data), .instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
}); });
} }
} }

View File

@ -165,13 +165,14 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
if (!flags.test(MimgModifier::Pcf)) { if (!flags.test(MimgModifier::Pcf)) {
return ir.ImageGather(handle, body, offset, {}, info); return ir.ImageGather(handle, body, offset, {}, info);
} }
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
return ir.ImageGatherDref(handle, body, offset, {}, dref, info); return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
}(); }();
// For gather4 instructions dmask selects which component to read and must have
// only one bit set to 1
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
continue;
}
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value); ir.SetVectorReg(dest_reg++, value);
} }

View File

@ -77,7 +77,8 @@ struct BufferResource {
u32 num_records; u32 num_records;
IR::Type used_types; IR::Type used_types;
AmdGpu::Buffer inline_cbuf; AmdGpu::Buffer inline_cbuf;
bool is_storage; bool is_storage{false};
bool is_instance_data{false};
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
}; };
@ -116,6 +117,7 @@ struct Info {
u8 sgpr_base; u8 sgpr_base;
u8 dword_offset; u8 dword_offset;
InstanceIdType instance_step_rate; InstanceIdType instance_step_rate;
s32 instance_data_buf;
}; };
boost::container::static_vector<VsInput, 32> vs_inputs{}; boost::container::static_vector<VsInput, 32> vs_inputs{};

View File

@ -66,19 +66,8 @@ void Liverpool::Process(std::stop_token stoken) {
} }
} }
if (submit_done) { Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
std::scoped_lock lk{submit_mutex};
submit_cv.notify_all();
submit_done = false;
} }
}
}
void Liverpool::WaitGpuIdle() {
RENDERER_TRACE;
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, [this] { return num_submits == 0; });
} }
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) { Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {

View File

@ -887,7 +887,10 @@ struct Liverpool {
IndexBufferType index_buffer_type; IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id; u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1); INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
PolygonOffset poly_offset; PolygonOffset poly_offset;
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
AaConfig aa_config; AaConfig aa_config;
@ -937,18 +940,10 @@ public:
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb); void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
void SubmitAsc(u32 vqid, std::span<const u32> acb); void SubmitAsc(u32 vqid, std::span<const u32> acb);
void WaitGpuIdle();
bool IsGpuIdle() const { bool IsGpuIdle() const {
return num_submits == 0; return num_submits == 0;
} }
void NotifySubmitDone() {
std::scoped_lock lk{submit_mutex};
submit_done = true;
submit_cv.notify_all();
}
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_; rasterizer = rasterizer_;
} }
@ -1017,7 +1012,6 @@ private:
u32 num_submits{}; u32 num_submits{};
std::mutex submit_mutex; std::mutex submit_mutex;
std::condition_variable_any submit_cv; std::condition_variable_any submit_cv;
std::atomic<bool> submit_done{};
}; };
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
@ -1055,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);

View File

@ -321,6 +321,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) { if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc4UnormBlock; return vk::Format::eBc4UnormBlock;
} }
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc5UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sint) { num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16B16A16Sint; return vk::Format::eR16G16B16A16Sint;
@ -366,6 +369,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) { if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm; return vk::Format::eR8G8Unorm;
} }
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR8G8Snorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) { if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc7UnormBlock; return vk::Format::eBc7UnormBlock;
} }
@ -429,6 +435,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) { if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16Unorm; return vk::Format::eR16Unorm;
} }
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16G16B16A16Unorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
} }

View File

@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
stages[i] = *infos[i]; stages[i] = *infos[i];
} }
BuildDescSetLayout(); BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = 2 * sizeof(u32),
};
const vk::DescriptorSetLayout set_layout = *desc_layout; const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = { const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U, .setLayoutCount = 1U,
.pSetLayouts = &set_layout, .pSetLayouts = &set_layout,
.pushConstantRangeCount = 0, .pushConstantRangeCount = 1,
.pPushConstantRanges = nullptr, .pPushConstantRanges = &push_constants,
}; };
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes; boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0]; const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) { for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader
continue;
}
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset); const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({ attributes.push_back({
.location = input.binding, .location = input.binding,
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
// Calculate buffers memory overlaps // Calculate buffers memory overlaps
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{}; boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) { for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset); const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;

View File

@ -67,20 +67,24 @@ public:
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const; VideoCore::TextureCache& texture_cache) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept { vk::Pipeline Handle() const noexcept {
return *pipeline; return *pipeline;
} }
[[nodiscard]] bool IsEmbeddedVs() const noexcept { vk::PipelineLayout GetLayout() const {
return *pipeline_layout;
}
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[0] == EmbeddedVsHash; return key.stage_hashes[0] == EmbeddedVsHash;
} }
[[nodiscard]] auto GetWriteMasks() const { auto GetWriteMasks() const {
return key.write_masks; return key.write_masks;
} }
[[nodiscard]] bool IsDepthEnabled() const { bool IsDepthEnabled() const {
return key.depth.depth_enable.Value(); return key.depth.depth_enable.Value();
} }

View File

@ -183,7 +183,7 @@ void PipelineCache::RefreshGraphicsKey() {
int remapped_cb{}; int remapped_cb{};
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb]; auto const& col_buf = regs.color_buffers[cb];
if (!col_buf || skip_cb_binding) { if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
continue; continue;
} }
const auto base_format = const auto base_format =

View File

@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
const u32 step_rates[] = {
regs.vgt_instance_step_rate_0,
regs.vgt_instance_step_rate_1,
};
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
sizeof(step_rates), &step_rates);
if (is_indexed) { if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else { } else {
@ -99,6 +106,12 @@ void Rasterizer::BeginRendering() {
continue; continue;
} }
// If the color buffer is still bound but rendering to it is disabled by the target mask,
// we need to prevent the render area from being affected by unbound render target extents.
if (!regs.color_target_mask.GetMask(col_buf_id)) {
continue;
}
const auto& hint = liverpool->last_cb_extent[col_buf_id]; const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint); const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
const auto& image = texture_cache.GetImage(image_view.image_id); const auto& image = texture_cache.GetImage(image_view.image_id);

View File

@ -189,10 +189,14 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32Uint: case vk::Format::eR32Uint:
return vk::Format::eR32Uint; return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint; return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock: case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock: case vk::Format::eBc3UnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock: case vk::Format::eBc7UnormBlock:
return vk::Format::eR32G32B32A32Uint; return vk::Format::eR32G32B32A32Uint;