shader_recompiler: Shared mem has side-effects, fix format component order

This commit is contained in:
IndecisiveTurtle 2024-07-03 04:13:03 +03:00
parent 8103dde915
commit f212f43e18
6 changed files with 24 additions and 8 deletions

View File

@ -1962,7 +1962,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
if (Config::dumpPM4()) {
static auto last_frame_num = -1LL;
static u32 seq_num{};
if (last_frame_num == frames_submitted) {
if (last_frame_num == frames_submitted && cbpair == 0) {
++seq_num;
} else {
last_frame_num = frames_submitted;

View File

@ -149,9 +149,15 @@ void CFG::LinkBlocks() {
block.end_class = EndClass::Branch;
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
const auto& prev_inst = inst_list[block.end_index - 1];
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0 &&
prev_inst.control.exp.target != 9) {
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
if (prev_inst.control.exp.target != 9) {
block.end_class = EndClass::Kill;
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
exec_mask.src[0].field == OperandField::ConstZero) {
block.end_class = EndClass::Kill;
} else {
block.end_class = EndClass::Exit;
}
} else {
block.end_class = EndClass::Exit;
}

View File

@ -40,6 +40,9 @@ Inst::~Inst() {
bool Inst::MayHaveSideEffects() const noexcept {
switch (op) {
case Opcode::Barrier:
case Opcode::WorkgroupMemoryBarrier:
case Opcode::DeviceMemoryBarrier:
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:
@ -52,6 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferU32:
case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32:
case Opcode::WriteSharedU16:
case Opcode::WriteSharedU8:
case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32:

View File

@ -291,11 +291,11 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm;
return vk::Format::eB8G8R8A8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb;
return vk::Format::eB8G8R8A8Srgb;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {

View File

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -176,7 +176,9 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = regs.num_indices * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
static constexpr std::array<u16, 4> test{};
std::memcpy(data, index_address, index_buffer_size);
ASSERT(std::memcmp(data, test.data(), sizeof(test)) != 0);
vertex_index_buffer.Commit(index_buffer_size);
// Bind index buffer.

View File

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
// scheduler.Wait(watch.tick);
scheduler.Wait(watch.tick);
++wait_cursor;
}
}