From 4eda0452b5b1ffb983fa6eaa8e716594811a1f79 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 23 Aug 2024 01:22:41 +0300 Subject: [PATCH] shader_recompiler: Writelane elimination pass + null image fix --- src/common/slot_vector.h | 28 +++++++------------ .../libraries/kernel/thread_management.cpp | 1 - .../backend/spirv/emit_spirv_image.cpp | 6 ++-- .../backend/spirv/emit_spirv_instructions.h | 3 ++ .../backend/spirv/emit_spirv_warp.cpp | 12 ++++++++ .../backend/spirv/spirv_emit_context.cpp | 2 +- .../frontend/translate/data_share.cpp | 16 ++++++----- .../frontend/translate/scalar_alu.cpp | 9 ++++-- src/shader_recompiler/ir/ir_emitter.cpp | 16 ++++++++--- src/shader_recompiler/ir/ir_emitter.h | 4 ++- src/shader_recompiler/ir/opcodes.inc | 26 +++++++++-------- .../ir/passes/constant_propogation_pass.cpp | 16 ++++++++++- .../ir/passes/resource_tracking_pass.cpp | 28 ++++++++----------- .../ir/passes/ssa_rewrite_pass.cpp | 20 +------------ src/shader_recompiler/recompiler.cpp | 4 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 -- src/video_core/texture_cache/image.cpp | 2 ++ src/video_core/texture_cache/image_info.cpp | 7 ++++- src/video_core/texture_cache/image_info.h | 3 ++ .../texture_cache/texture_cache.cpp | 8 ++++-- 20 files changed, 119 insertions(+), 95 deletions(-) diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index f0982e29..2158d4d6 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -28,9 +28,13 @@ struct SlotId { template class SlotVector { - constexpr static std::size_t InitialCapacity = 1024; + constexpr static std::size_t InitialCapacity = 2048; public: + SlotVector() { + Reserve(InitialCapacity); + } + ~SlotVector() noexcept { std::size_t index = 0; for (u64 bits : stored_bitset) { @@ -67,19 +71,6 @@ public: return SlotId{index}; } - template - [[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept { - const u32 index = FreeValueIndex(); - T& existing_value = values[existing_id.index].object; - - new (&values[index].object) T(std::move(existing_value)); - existing_value.~T(); - new (&values[existing_id.index].object) T(std::forward(args)...); - SetStorageBit(index); - - return SlotId{index}; - } - void erase(SlotId id) noexcept { values[id.index].object.~T(); free_list.push_back(id.index); @@ -116,9 +107,9 @@ private: } void ValidateIndex([[maybe_unused]] SlotId id) const noexcept { - DEBUG_ASSERT(id); - DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); - DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + ASSERT(id); + ASSERT(id.index / 64 < stored_bitset.size()); + ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); } [[nodiscard]] u32 FreeValueIndex() noexcept { @@ -151,7 +142,8 @@ private: const std::size_t old_free_size = free_list.size(); free_list.resize(old_free_size + (new_capacity - values_capacity)); - std::iota(free_list.begin() + old_free_size, free_list.end(), + const std::size_t new_free_size = free_list.size(); + std::iota(free_list.rbegin(), free_list.rbegin() + new_free_size - old_free_size, static_cast(values_capacity)); delete[] values; diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 68953269..567fff18 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1123,7 +1123,6 @@ int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** res) { } int PS4_SYSV_ABI scePthreadDetach(ScePthread thread) { - LOG_INFO(Kernel_Pthread, "thread create name = {}", thread->name); thread->is_detached = true; return ORBIS_OK; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 5526e541..600ffa71 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" @@ -117,7 +117,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const u32 comp = inst->Flags().gather_comp.Value(); ImageOperands operands; - operands.AddOffset(ctx, offset); + operands.AddOffset(ctx, offset, true); return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, operands.operands); } @@ -129,7 +129,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.AddOffset(ctx, offset); + operands.AddOffset(ctx, offset, true); return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, operands.operands); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index bc39bc0f..0f883c24 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -407,5 +407,8 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co Id EmitLaneId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); +Id EmitReadFirstLane(EmitContext& ctx, Id value); +Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); +Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 38afd90f..c55763c5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -22,4 +22,16 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index) { return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index); } +Id EmitReadFirstLane(EmitContext& ctx, Id value) { + UNREACHABLE(); +} + +Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) { + UNREACHABLE(); +} + +Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) { + return ctx.u32_zero_value; +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d61e108f..55e12436 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -329,7 +329,7 @@ void EmitContext::DefinePushDataBlock() { void EmitContext::DefineBuffers() { boost::container::small_vector type_ids; for (u32 i = 0; const auto& buffer : info.buffers) { - const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; + const auto* data_types = &F32;/*True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;*/ const Id data_type = (*data_types)[1]; const Id record_array_type{buffer.is_storage ? TypeRuntimeArray(data_type) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index b7b5aa13..7580f744 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -127,7 +127,6 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); @@ -139,7 +138,6 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); @@ -151,7 +149,6 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); @@ -168,13 +165,18 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { } void Translator::V_READLANE_B32(const GcnInst& inst) { - ASSERT(info.stage != Stage::Compute); - SetDst(inst.dst[0], GetSrc(inst.src[0])); + const IR::ScalarReg dst{inst.dst[0].code}; + const IR::U32 value{GetSrc(inst.src[0])}; + const IR::U32 lane{GetSrc(inst.src[1])}; + ir.SetScalarReg(dst, ir.ReadLane(value, lane)); } void Translator::V_WRITELANE_B32(const GcnInst& inst) { - ASSERT(info.stage != Stage::Compute); - SetDst(inst.dst[0], GetSrc(inst.src[0])); + const IR::VectorReg dst{inst.dst[0].code}; + const IR::U32 value{GetSrc(inst.src[0])}; + const IR::U32 lane{GetSrc(inst.src[1])}; + const IR::U32 old_value{GetSrc(inst.dst[0])}; + ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 812d93ba..2ee80ff9 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -440,13 +440,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) { void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { // This only really exists to let resource tracking pass know // there is an inline cbuf. - SetDst(inst.dst[0], ir.Imm32(pc)); + const IR::ScalarReg dst{inst.dst[0].code}; + ir.SetScalarReg(dst, ir.Imm32(pc)); + ir.SetScalarReg(dst + 1, ir.Imm32(0)); } void Translator::S_ADDC_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo())); + const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))}; + SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); } void Translator::S_MAX_U32(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3ae06807..0f2fb2f7 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -209,10 +209,6 @@ U1 IREmitter::GetVcc() { return Inst(Opcode::GetVcc); } -U32 IREmitter::GetSccLo() { - return Inst(Opcode::GetSccLo); -} - U32 IREmitter::GetVccLo() { return Inst(Opcode::GetVccLo); } @@ -445,6 +441,18 @@ U32 IREmitter::QuadShuffle(const U32& value, const U32& index) { return Inst(Opcode::QuadShuffle, value, index); } +U32 IREmitter::ReadFirstLane(const U32& value) { + return Inst(Opcode::ReadFirstLane, value); +} + +U32 IREmitter::ReadLane(const U32& value, const U32& lane) { + return Inst(Opcode::ReadLane, value, lane); +} + +U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& lane) { + return Inst(Opcode::WriteLane, value, write_value, lane); +} + F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index be7f2515..45fa5f21 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -65,7 +65,6 @@ public: [[nodiscard]] U1 GetScc(); [[nodiscard]] U1 GetExec(); [[nodiscard]] U1 GetVcc(); - [[nodiscard]] U32 GetSccLo(); [[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccHi(); void SetScc(const U1& value); @@ -122,6 +121,9 @@ public: [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); + [[nodiscard]] U32 ReadFirstLane(const U32& value); + [[nodiscard]] U32 ReadLane(const U32& value, const U32& lane); + [[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index e9ecd435..9be89f64 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -58,7 +58,6 @@ OPCODE(SetAttribute, Void, Attr OPCODE(GetScc, U1, Void, ) OPCODE(GetExec, U1, Void, ) OPCODE(GetVcc, U1, Void, ) -OPCODE(GetSccLo, U32, Void, ) OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccHi, U32, Void, ) OPCODE(SetScc, Void, U1, ) @@ -330,19 +329,22 @@ OPCODE(ImageRead, U32x4, Opaq OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) // Image atomic operations -OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) -OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) // Warp operations OPCODE(LaneId, U32, ) OPCODE(WarpId, U32, ) OPCODE(QuadShuffle, U32, U32, U32 ) +OPCODE(ReadFirstLane, U32, U32, U32 ) +OPCODE(ReadLane, U32, U32, U32 ) +OPCODE(WriteLane, U32, U32, U32, U32 ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index 94218b32..97c24cba 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include #include @@ -250,6 +250,18 @@ void FoldCmpClass(IR::Inst& inst) { } } +void FoldReadLane(IR::Inst& inst) { + const u32 lane = inst.Arg(1).U32(); + IR::Inst* prod = inst.Arg(0).InstRecursive(); + while (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (prod->Arg(2).U32() == lane) { + inst.ReplaceUsesWith(prod->Arg(1)); + return; + } + prod = prod->Arg(0).InstRecursive(); + } +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::IAdd32: @@ -289,6 +301,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::ReadLane: + return FoldReadLane(inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 20a66ad0..b313c729 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include #include "shader_recompiler/ir/basic_block.h" @@ -345,6 +345,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) { // Retrieve SGPR pair that holds sbase const auto pred1 = [](const IR::Inst* inst) -> std::optional { + ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst); if (inst->GetOpcode() == IR::Opcode::GetUserData) { return inst->Arg(0).ScalarReg(); } @@ -402,24 +403,13 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, // is used to define an inline constant buffer IR::Inst* handle = inst.Arg(0).InstRecursive(); - IR::Inst* p0 = handle->Arg(0).InstRecursive(); - if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() || - !p0->Arg(1).IsImmediate()) { - return -1; - } - IR::Inst* p1 = handle->Arg(1).InstRecursive(); - if (p1->GetOpcode() != IR::Opcode::IAdd32) { - return -1; - } - if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) { + if (!handle->AreAllArgsImmediates()) { return -1; } // We have found this pattern. Build the sharp. - std::array buffer; - buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); - buffer[1] = 0; - buffer[2] = handle->Arg(2).U32(); - buffer[3] = handle->Arg(3).U32(); + std::array buffer; + buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); + buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; cbuf = std::bit_cast(buffer); // Assign a binding to this sharp. return descriptors.Add(BufferResource{ @@ -617,7 +607,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - const auto read = [&](u32 offset) -> auto { + const auto read = [&](u32 offset) -> IR::U32 { + if (arg.IsImmediate()) { + const u16 comp = (arg.U32() >> offset) & 0x3F; + return ir.Imm32(s32(comp << 26) >> 26); + } return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); }; diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index eef73a65..9edb157d 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -32,7 +32,6 @@ struct SccFlagTag : FlagTag {}; struct ExecFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {}; struct VccLoTag : FlagTag {}; -struct SccLoTag : FlagTag {}; struct VccHiTag : FlagTag {}; struct GotoVariable : FlagTag { @@ -45,7 +44,7 @@ struct GotoVariable : FlagTag { }; using Variant = std::variant; + VccFlagTag, VccLoTag, VccHiTag>; using ValueMap = std::unordered_map; struct DefTable { @@ -84,13 +83,6 @@ struct DefTable { exec_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SccLoTag) { - return scc_lo_flag[block]; - } - void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) { - scc_lo_flag.insert_or_assign(block, value); - } - const IR::Value& Def(IR::Block* block, VccLoTag) { return vcc_lo_flag[block]; } @@ -133,10 +125,6 @@ IR::Opcode UndefOpcode(const VccLoTag) noexcept { return IR::Opcode::UndefU32; } -IR::Opcode UndefOpcode(const SccLoTag) noexcept { - return IR::Opcode::UndefU32; -} - IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } @@ -336,9 +324,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetVcc: pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0)); break; - case IR::Opcode::SetSccLo: - pass.WriteVariable(SccLoTag{}, block, inst.Arg(0)); - break; case IR::Opcode::SetVccLo: pass.WriteVariable(VccLoTag{}, block, inst.Arg(0)); break; @@ -371,9 +356,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetVcc: inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block)); break; - case IR::Opcode::GetSccLo: - inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block)); - break; case IR::Opcode::GetVccLo: inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block)); break; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 0f9fd6d4..5c589da6 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -56,15 +56,15 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); - Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); if (program.info.stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } + Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); + LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); return program; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 55f04bac..139edcf7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,9 +280,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { DumpShader(code, hash, stage, "bin"); } - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) { LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage); return {}; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 528dda55..f7ab3712 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -117,6 +117,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + LOG_INFO(Render_Vulkan, "Creating image {}x{} at {:#x} M:{}", info.size.width, info.size.height, + info.guest_address, info.resources.levels); mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 17b78a6d..a073d046 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -219,7 +219,12 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { guest_address = image.Address(); mips_layout.reserve(resources.levels); + tiling_idx = image.tiling_index; + UpdateSize(); +} +void ImageInfo::UpdateSize() { + mips_layout.clear(); MipInfo mip_info{}; guest_size_bytes = 0; for (auto mip = 0u; mip < resources.levels; ++mip) { @@ -265,7 +270,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { ASSERT(!props.is_block); ASSERT(num_samples == 1); std::tie(mip_info.pitch, mip_info.size) = - ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); + ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx); break; } default: { diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 9dad0dd6..ddad318d 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -29,6 +29,8 @@ struct ImageInfo { bool IsPacked() const; bool IsDepthStencil() const; + void UpdateSize(); + struct { VAddr cmask_addr; VAddr fmask_addr; @@ -69,6 +71,7 @@ struct ImageInfo { boost::container::small_vector mips_layout; VAddr guest_address{0}; u32 guest_size_bytes{0}; + u32 tiling_idx{0}; // TODO: merge with existing! }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 6bc893b0..7c57f6ef 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include "common/assert.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -18,11 +18,15 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { - ImageInfo info; + ImageInfo info{}; info.pixel_format = vk::Format::eR8G8B8A8Unorm; info.type = vk::ImageType::e2D; + info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled); + info.num_bits = 32; + info.UpdateSize(); const ImageId null_id = slot_images.insert(instance, scheduler, info); ASSERT(null_id.index == 0); + slot_images[null_id].flags = ImageFlagBits{}; ImageViewInfo view_info; void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));