shader_recompiler: Writelane elimination pass + null image fix

This commit is contained in:
IndecisiveTurtle 2024-08-23 01:22:41 +03:00
parent bce3a9c9e7
commit 4eda0452b5
20 changed files with 119 additions and 95 deletions

View File

@ -28,9 +28,13 @@ struct SlotId {
template <class T> template <class T>
class SlotVector { class SlotVector {
constexpr static std::size_t InitialCapacity = 1024; constexpr static std::size_t InitialCapacity = 2048;
public: public:
SlotVector() {
Reserve(InitialCapacity);
}
~SlotVector() noexcept { ~SlotVector() noexcept {
std::size_t index = 0; std::size_t index = 0;
for (u64 bits : stored_bitset) { for (u64 bits : stored_bitset) {
@ -67,19 +71,6 @@ public:
return SlotId{index}; return SlotId{index};
} }
template <typename... Args>
[[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept {
const u32 index = FreeValueIndex();
T& existing_value = values[existing_id.index].object;
new (&values[index].object) T(std::move(existing_value));
existing_value.~T();
new (&values[existing_id.index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept { void erase(SlotId id) noexcept {
values[id.index].object.~T(); values[id.index].object.~T();
free_list.push_back(id.index); free_list.push_back(id.index);
@ -116,9 +107,9 @@ private:
} }
void ValidateIndex([[maybe_unused]] SlotId id) const noexcept { void ValidateIndex([[maybe_unused]] SlotId id) const noexcept {
DEBUG_ASSERT(id); ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
} }
[[nodiscard]] u32 FreeValueIndex() noexcept { [[nodiscard]] u32 FreeValueIndex() noexcept {
@ -151,7 +142,8 @@ private:
const std::size_t old_free_size = free_list.size(); const std::size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity)); free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(), const std::size_t new_free_size = free_list.size();
std::iota(free_list.rbegin(), free_list.rbegin() + new_free_size - old_free_size,
static_cast<u32>(values_capacity)); static_cast<u32>(values_capacity));
delete[] values; delete[] values;

View File

@ -1123,7 +1123,6 @@ int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** res) {
} }
int PS4_SYSV_ABI scePthreadDetach(ScePthread thread) { int PS4_SYSV_ABI scePthreadDetach(ScePthread thread) {
LOG_INFO(Kernel_Pthread, "thread create name = {}", thread->name);
thread->is_detached = true; thread->is_detached = true;
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
@ -117,7 +117,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value(); const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
ImageOperands operands; ImageOperands operands;
operands.AddOffset(ctx, offset); operands.AddOffset(ctx, offset, true);
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
operands.operands); operands.operands);
} }
@ -129,7 +129,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
ImageOperands operands; ImageOperands operands;
operands.AddOffset(ctx, offset); operands.AddOffset(ctx, offset, true);
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
operands.operands); operands.operands);
} }

View File

@ -407,5 +407,8 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitLaneId(EmitContext& ctx); Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -22,4 +22,16 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index) {
return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index); return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index);
} }
Id EmitReadFirstLane(EmitContext& ctx, Id value) {
UNREACHABLE();
}
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
UNREACHABLE();
}
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
return ctx.u32_zero_value;
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -329,7 +329,7 @@ void EmitContext::DefinePushDataBlock() {
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids; boost::container::small_vector<Id, 8> type_ids;
for (u32 i = 0; const auto& buffer : info.buffers) { for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const auto* data_types = &F32;/*True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;*/
const Id data_type = (*data_types)[1]; const Id data_type = (*data_types)[1];
const Id record_array_type{buffer.is_storage const Id record_array_type{buffer.is_storage
? TypeRuntimeArray(data_type) ? TypeRuntimeArray(data_type)

View File

@ -127,7 +127,6 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) { if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val}); SetDst(inst.dst[0], IR::U32{original_val});
@ -139,7 +138,6 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false); const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
if (rtn) { if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val}); SetDst(inst.dst[0], IR::U32{original_val});
@ -151,7 +149,6 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false); const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
if (rtn) { if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val}); SetDst(inst.dst[0], IR::U32{original_val});
@ -168,13 +165,18 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
} }
void Translator::V_READLANE_B32(const GcnInst& inst) { void Translator::V_READLANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute); const IR::ScalarReg dst{inst.dst[0].code};
SetDst(inst.dst[0], GetSrc(inst.src[0])); const IR::U32 value{GetSrc(inst.src[0])};
const IR::U32 lane{GetSrc(inst.src[1])};
ir.SetScalarReg(dst, ir.ReadLane(value, lane));
} }
void Translator::V_WRITELANE_B32(const GcnInst& inst) { void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute); const IR::VectorReg dst{inst.dst[0].code};
SetDst(inst.dst[0], GetSrc(inst.src[0])); const IR::U32 value{GetSrc(inst.src[0])};
const IR::U32 lane{GetSrc(inst.src[1])};
const IR::U32 old_value{GetSrc(inst.dst[0])};
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
} }
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -440,13 +440,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know // This only really exists to let resource tracking pass know
// there is an inline cbuf. // there is an inline cbuf.
SetDst(inst.dst[0], ir.Imm32(pc)); const IR::ScalarReg dst{inst.dst[0].code};
ir.SetScalarReg(dst, ir.Imm32(pc));
ir.SetScalarReg(dst + 1, ir.Imm32(0));
} }
void Translator::S_ADDC_U32(const GcnInst& inst) { void Translator::S_ADDC_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo())); const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
} }
void Translator::S_MAX_U32(const GcnInst& inst) { void Translator::S_MAX_U32(const GcnInst& inst) {

View File

@ -209,10 +209,6 @@ U1 IREmitter::GetVcc() {
return Inst<U1>(Opcode::GetVcc); return Inst<U1>(Opcode::GetVcc);
} }
U32 IREmitter::GetSccLo() {
return Inst<U32>(Opcode::GetSccLo);
}
U32 IREmitter::GetVccLo() { U32 IREmitter::GetVccLo() {
return Inst<U32>(Opcode::GetVccLo); return Inst<U32>(Opcode::GetVccLo);
} }
@ -445,6 +441,18 @@ U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
return Inst<U32>(Opcode::QuadShuffle, value, index); return Inst<U32>(Opcode::QuadShuffle, value, index);
} }
U32 IREmitter::ReadFirstLane(const U32& value) {
return Inst<U32>(Opcode::ReadFirstLane, value);
}
U32 IREmitter::ReadLane(const U32& value, const U32& lane) {
return Inst<U32>(Opcode::ReadLane, value, lane);
}
U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& lane) {
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

View File

@ -65,7 +65,6 @@ public:
[[nodiscard]] U1 GetScc(); [[nodiscard]] U1 GetScc();
[[nodiscard]] U1 GetExec(); [[nodiscard]] U1 GetExec();
[[nodiscard]] U1 GetVcc(); [[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetSccLo();
[[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi(); [[nodiscard]] U32 GetVccHi();
void SetScc(const U1& value); void SetScc(const U1& value);
@ -122,6 +121,9 @@ public:
[[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId(); [[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
[[nodiscard]] U32 ReadFirstLane(const U32& value);
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View File

@ -58,7 +58,6 @@ OPCODE(SetAttribute, Void, Attr
OPCODE(GetScc, U1, Void, ) OPCODE(GetScc, U1, Void, )
OPCODE(GetExec, U1, Void, ) OPCODE(GetExec, U1, Void, )
OPCODE(GetVcc, U1, Void, ) OPCODE(GetVcc, U1, Void, )
OPCODE(GetSccLo, U32, Void, )
OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, ) OPCODE(GetVccHi, U32, Void, )
OPCODE(SetScc, Void, U1, ) OPCODE(SetScc, Void, U1, )
@ -330,19 +329,22 @@ OPCODE(ImageRead, U32x4, Opaq
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Image atomic operations // Image atomic operations
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Warp operations // Warp operations
OPCODE(LaneId, U32, ) OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, ) OPCODE(WarpId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 ) OPCODE(QuadShuffle, U32, U32, U32 )
OPCODE(ReadFirstLane, U32, U32, U32 )
OPCODE(ReadLane, U32, U32, U32 )
OPCODE(WriteLane, U32, U32, U32, U32 )

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <bit> #include <bit>
#include <optional> #include <optional>
#include <type_traits> #include <type_traits>
@ -250,6 +250,18 @@ void FoldCmpClass(IR::Inst& inst) {
} }
} }
void FoldReadLane(IR::Inst& inst) {
const u32 lane = inst.Arg(1).U32();
IR::Inst* prod = inst.Arg(0).InstRecursive();
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
if (prod->Arg(2).U32() == lane) {
inst.ReplaceUsesWith(prod->Arg(1));
return;
}
prod = prod->Arg(0).InstRecursive();
}
}
void ConstantPropagation(IR::Block& block, IR::Inst& inst) { void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::IAdd32: case IR::Opcode::IAdd32:
@ -289,6 +301,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF32: case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64: case IR::Opcode::SelectF64:
return FoldSelect(inst); return FoldSelect(inst);
case IR::Opcode::ReadLane:
return FoldReadLane(inst);
case IR::Opcode::FPNeg32: case IR::Opcode::FPNeg32:
FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
return; return;

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <algorithm> #include <algorithm>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
@ -345,6 +345,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
// Retrieve SGPR pair that holds sbase // Retrieve SGPR pair that holds sbase
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> { const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst);
if (inst->GetOpcode() == IR::Opcode::GetUserData) { if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst->Arg(0).ScalarReg(); return inst->Arg(0).ScalarReg();
} }
@ -402,24 +403,13 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
// is used to define an inline constant buffer // is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive(); IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive(); if (!handle->AreAllArgsImmediates()) {
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
!p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
return -1;
}
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
return -1; return -1;
} }
// We have found this pattern. Build the sharp. // We have found this pattern. Build the sharp.
std::array<u32, 4> buffer; std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
buffer[1] = 0; buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
buffer[2] = handle->Arg(2).U32();
buffer[3] = handle->Arg(3).U32();
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer); cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp. // Assign a binding to this sharp.
return descriptors.Add(BufferResource{ return descriptors.Add(BufferResource{
@ -617,7 +607,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const IR::Value arg = inst.Arg(arg_pos); const IR::Value arg = inst.Arg(arg_pos);
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
const auto read = [&](u32 offset) -> auto { const auto read = [&](u32 offset) -> IR::U32 {
if (arg.IsImmediate()) {
const u16 comp = (arg.U32() >> offset) & 0x3F;
return ir.Imm32(s32(comp << 26) >> 26);
}
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
}; };

View File

@ -32,7 +32,6 @@ struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {}; struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {}; struct VccLoTag : FlagTag {};
struct SccLoTag : FlagTag {};
struct VccHiTag : FlagTag {}; struct VccHiTag : FlagTag {};
struct GotoVariable : FlagTag { struct GotoVariable : FlagTag {
@ -45,7 +44,7 @@ struct GotoVariable : FlagTag {
}; };
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag, using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>; VccFlagTag, VccLoTag, VccHiTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>; using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable { struct DefTable {
@ -84,13 +83,6 @@ struct DefTable {
exec_flag.insert_or_assign(block, value); exec_flag.insert_or_assign(block, value);
} }
const IR::Value& Def(IR::Block* block, SccLoTag) {
return scc_lo_flag[block];
}
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
scc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccLoTag) { const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block]; return vcc_lo_flag[block];
} }
@ -133,10 +125,6 @@ IR::Opcode UndefOpcode(const VccLoTag) noexcept {
return IR::Opcode::UndefU32; return IR::Opcode::UndefU32;
} }
IR::Opcode UndefOpcode(const SccLoTag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const VccHiTag) noexcept { IR::Opcode UndefOpcode(const VccHiTag) noexcept {
return IR::Opcode::UndefU32; return IR::Opcode::UndefU32;
} }
@ -336,9 +324,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVcc: case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0)); pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break; break;
case IR::Opcode::SetSccLo:
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVccLo: case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0)); pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break; break;
@ -371,9 +356,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVcc: case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block)); inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break; break;
case IR::Opcode::GetSccLo:
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
break;
case IR::Opcode::GetVccLo: case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block)); inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break; break;

View File

@ -56,15 +56,15 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
// Run optimization passes // Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
if (program.info.stage != Stage::Compute) { if (program.info.stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program); Shader::Optimization::LowerSharedMemToRegisters(program);
} }
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program); Shader::Optimization::CollectShaderInfoPass(program);
LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
return program; return program;
} }

View File

@ -280,9 +280,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
DumpShader(code, hash, stage, "bin"); DumpShader(code, hash, stage, "bin");
} }
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) { if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) {
LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage); LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage);
return {}; return {};

View File

@ -117,6 +117,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
: instance{&instance_}, scheduler{&scheduler_}, info{info_}, : instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
cpu_addr_end{cpu_addr + info.guest_size_bytes} { cpu_addr_end{cpu_addr + info.guest_size_bytes} {
LOG_INFO(Render_Vulkan, "Creating image {}x{} at {:#x} M:{}", info.size.width, info.size.height,
info.guest_address, info.resources.levels);
mip_hashes.resize(info.resources.levels); mip_hashes.resize(info.resources.levels);
ASSERT(info.pixel_format != vk::Format::eUndefined); ASSERT(info.pixel_format != vk::Format::eUndefined);
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case

View File

@ -219,7 +219,12 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
guest_address = image.Address(); guest_address = image.Address();
mips_layout.reserve(resources.levels); mips_layout.reserve(resources.levels);
tiling_idx = image.tiling_index;
UpdateSize();
}
void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{}; MipInfo mip_info{};
guest_size_bytes = 0; guest_size_bytes = 0;
for (auto mip = 0u; mip < resources.levels; ++mip) { for (auto mip = 0u; mip < resources.levels; ++mip) {
@ -265,7 +270,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
ASSERT(!props.is_block); ASSERT(!props.is_block);
ASSERT(num_samples == 1); ASSERT(num_samples == 1);
std::tie(mip_info.pitch, mip_info.size) = std::tie(mip_info.pitch, mip_info.size) =
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx);
break; break;
} }
default: { default: {

View File

@ -29,6 +29,8 @@ struct ImageInfo {
bool IsPacked() const; bool IsPacked() const;
bool IsDepthStencil() const; bool IsDepthStencil() const;
void UpdateSize();
struct { struct {
VAddr cmask_addr; VAddr cmask_addr;
VAddr fmask_addr; VAddr fmask_addr;
@ -69,6 +71,7 @@ struct ImageInfo {
boost::container::small_vector<MipInfo, 14> mips_layout; boost::container::small_vector<MipInfo, 14> mips_layout;
VAddr guest_address{0}; VAddr guest_address{0};
u32 guest_size_bytes{0}; u32 guest_size_bytes{0};
u32 tiling_idx{0}; // TODO: merge with existing!
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include <xxhash.h> #include <xxhash.h>
#include "common/assert.h" #include "common/assert.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -18,11 +18,15 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
BufferCache& buffer_cache_, PageManager& tracker_) BufferCache& buffer_cache_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
tile_manager{instance, scheduler} { tile_manager{instance, scheduler} {
ImageInfo info; ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Unorm; info.pixel_format = vk::Format::eR8G8B8A8Unorm;
info.type = vk::ImageType::e2D; info.type = vk::ImageType::e2D;
info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled);
info.num_bits = 32;
info.UpdateSize();
const ImageId null_id = slot_images.insert(instance, scheduler, info); const ImageId null_id = slot_images.insert(instance, scheduler, info);
ASSERT(null_id.index == 0); ASSERT(null_id.index == 0);
slot_images[null_id].flags = ImageFlagBits{};
ImageViewInfo view_info; ImageViewInfo view_info;
void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id)); void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));