From b4e1eebcddcd349c7ee26b320234fe6886c5eab7 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 2 Jul 2024 04:27:44 +0300 Subject: [PATCH] shader_recompiler: Fix some atomic bugs --- .../libraries/kernel/thread_management.cpp | 37 +++++++++---------- src/core/libraries/system/msgdialog.cpp | 5 +++ .../backend/spirv/emit_spirv.cpp | 25 +++++++------ .../backend/spirv/emit_spirv_atomic.cpp | 3 +- .../spirv/emit_spirv_context_get_set.cpp | 18 ++++----- .../backend/spirv/emit_spirv_image.cpp | 9 ++++- .../backend/spirv/emit_spirv_instructions.h | 2 + .../backend/spirv/spirv_emit_context.cpp | 35 +++++++++++++++++- .../frontend/structured_control_flow.cpp | 8 ++-- .../frontend/translate/data_share.cpp | 5 +++ .../frontend/translate/translate.cpp | 3 ++ .../frontend/translate/translate.h | 1 + src/shader_recompiler/ir/attribute.h | 6 +-- src/shader_recompiler/ir/basic_block.cpp | 4 +- src/shader_recompiler/ir/microinstruction.cpp | 17 +++++++-- src/shader_recompiler/ir/opcodes.inc | 1 + .../ir/passes/constant_propogation_pass.cpp | 6 +-- .../ir/passes/resource_tracking_pass.cpp | 23 ++++++++++++ src/shader_recompiler/ir/post_order.cpp | 2 +- src/shader_recompiler/ir/reg.h | 6 +-- src/shader_recompiler/ir/value.cpp | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 4 ++ .../renderer_vulkan/vk_instance.cpp | 7 ++++ .../texture_cache/texture_cache.cpp | 2 +- 24 files changed, 165 insertions(+), 66 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 2b526eed..a9b9d14a 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -63,6 +63,7 @@ int PS4_SYSV_ABI scePthreadAttrInit(ScePthreadAttr* attr) { SceKernelSchedParam param{}; param.sched_priority = 700; + result = pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB); result = (result == 0 ? scePthreadAttrSetinheritsched(attr, 4) : result); result = (result == 0 ? scePthreadAttrSetschedparam(attr, ¶m) : result); result = (result == 0 ? scePthreadAttrSetschedpolicy(attr, SCHED_OTHER) : result); @@ -921,36 +922,34 @@ int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr attr = g_pthread_cxt->GetDefaultAttr(); } + if (name != nullptr && std::string_view(name) == "RenderMixThread") { + printf("bad\n"); + } + *thread = pthread_pool->Create(); if ((*thread)->attr != nullptr) { scePthreadAttrDestroy(&(*thread)->attr); } - scePthreadAttrInit(&(*thread)->attr); int result = pthread_copy_attributes(&(*thread)->attr, attr); + ASSERT(result == 0); - if (result == 0) { - if (name != NULL) { - (*thread)->name = name; - } else { - (*thread)->name = "no-name"; - } - (*thread)->entry = start_routine; - (*thread)->arg = arg; - (*thread)->is_almost_done = false; - (*thread)->is_detached = (*attr)->detached; - (*thread)->is_started = false; - - result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread); + if (name != NULL) { + (*thread)->name = name; + } else { + (*thread)->name = "no-name"; } + (*thread)->entry = start_routine; + (*thread)->arg = arg; + (*thread)->is_almost_done = false; + (*thread)->is_detached = (*attr)->detached; + (*thread)->is_started = false; + + pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB); + result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread); - if (result == 0) { - while (!(*thread)->is_started) { - std::this_thread::sleep_for(std::chrono::microseconds(1000)); - } - } LOG_INFO(Kernel_Pthread, "thread create name = {}", (*thread)->name); switch (result) { diff --git a/src/core/libraries/system/msgdialog.cpp b/src/core/libraries/system/msgdialog.cpp index 1c8653f5..142aa699 100644 --- a/src/core/libraries/system/msgdialog.cpp +++ b/src/core/libraries/system/msgdialog.cpp @@ -61,7 +61,12 @@ int PS4_SYSV_ABI sceMsgDialogUpdateStatus() { return ORBIS_OK; } +int PS4_SYSV_ABI sceImeDialogGetStatus() { + return 1; +} + void RegisterlibSceMsgDialog(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("IADmD4tScBY", "libSceImeDialog", 1, "libSceImeDialog", 1, 1, sceImeDialogGetStatus); LIB_FUNCTION("HTrcDKlFKuM", "libSceMsgDialog", 1, "libSceMsgDialog", 1, 1, sceMsgDialogClose); LIB_FUNCTION("Lr8ovHH9l6A", "libSceMsgDialog", 1, "libSceMsgDialog", 1, 1, sceMsgDialogGetResult); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e1931a9c..f53b24ca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -85,7 +85,7 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { #include "shader_recompiler/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->GetOpcode()); + UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { @@ -177,6 +177,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); switch (program.info.stage) { case Stage::Compute: { const std::array workgroup_size{program.info.workgroup_size}; @@ -272,47 +273,47 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { void EmitReference(EmitContext&) {} void EmitPhiMove(EmitContext&) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetScc(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetExec(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetVcc(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetVccLo(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetVccHi(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetScc(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetExec(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetVcc(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetVccLo(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetVccHi(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 68867956..e0bc4b77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -15,8 +15,7 @@ std::pair AtomicArgs(EmitContext& ctx) { Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& texture = ctx.images[handle & 0xFFFF]; - const Id image = ctx.OpLoad(texture.image_type, texture.id); - const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.ConstU32(0U))}; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, texture.id, coords, ctx.ConstU32(0U))}; const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ccddbff5..e2805a6f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -45,39 +45,39 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { } void EmitGetThreadBitScalarReg(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetThreadBitScalarReg(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetScalarRegister(EmitContext&) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetScalarRegister(EmitContext&) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetVectorRegister(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetVectorRegister(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitSetGotoVariable(EmitContext&) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } void EmitGetGotoVariable(EmitContext&) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } Id EmitReadConst(EmitContext& ctx) { - throw LogicError("Unreachable instruction"); + UNREACHABLE_MSG("Unreachable instruction"); } Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7f9f072a..d063ab55 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -81,12 +81,17 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); if (Sirit::ValidId(lod)) { - return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod); + return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, spv::ImageOperandsMask::Lod, lod)); } else { - return ctx.OpImageFetch(ctx.F32[4], image, coords); + return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords)); } } +Id EmitImageFetchU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, + Id ms) { + return Id{}; +} + Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 01672d0f..6cf87045 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -353,6 +353,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id ms); +Id EmitImageFetchU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, + Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 1d60a852..f52f67c8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -296,8 +296,41 @@ void EmitContext::DefineBuffers(const Info& info) { } } +spv::ImageFormat GetFormat(const AmdGpu::Image& image) { + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::R32ui; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::R32f; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::Rg32f; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::R16f; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::Rg16f; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::Rg8Snorm; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::Rgba16f; + } + UNREACHABLE(); +} + Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { - const auto format = spv::ImageFormat::Unknown; + const auto image = ctx.info.ReadUd(desc.sgpr_base, desc.dword_offset); + const auto format = desc.is_storage ? GetFormat(image) : spv::ImageFormat::Unknown; const u32 sampled = desc.is_storage ? 2 : 1; switch (desc.type) { case AmdGpu::ImageType::Color1D: diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index df9fe8b6..9b5cc3e6 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -187,7 +187,7 @@ std::string DumpExpr(const Statement* stmt) { case StatementType::Not: case StatementType::Or: case StatementType::Variable: - throw LogicError("Statement can't be printed"); + UNREACHABLE_MSG("Statement can't be printed"); } } return ret; @@ -335,7 +335,7 @@ private: } // Expensive operation: if (!AreSiblings(goto_stmt, label_stmt)) { - throw LogicError("Goto is not a sibling with the label"); + UNREACHABLE_MSG("Goto is not a sibling with the label"); } // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { @@ -451,7 +451,7 @@ private: case StatementType::Loop: return MoveOutwardLoop(goto_stmt); default: - throw LogicError("Invalid outward movement"); + UNREACHABLE_MSG("Invalid outward movement"); } } @@ -486,7 +486,7 @@ private: case StatementType::Loop: break; default: - throw LogicError("Invalid inward movement"); + UNREACHABLE_MSG("Invalid inward movement"); } Tree& nested_tree{label_nested_stmt->children}; Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c6ee79b0..a8ecc0c1 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -61,4 +61,9 @@ void Translator::S_BARRIER() { ir.Barrier(); } +void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], src0); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index f45a5960..24dc0495 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -867,6 +867,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::DS_WRITE2_B32: translator.DS_WRITE(32, false, true, inst); break; + case Opcode::V_READFIRSTLANE_B32: + translator.V_READFIRSTLANE_B32(inst); + break; case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 28a84522..83148077 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -166,6 +166,7 @@ public: void DS_SWIZZLE_B32(const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + void V_READFIRSTLANE_B32(const GcnInst& inst); void S_BARRIER(); // MIMG diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index b148578f..0cfbc421 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -4,8 +4,8 @@ #pragma once #include +#include "common/assert.h" #include "common/types.h" -#include "shader_recompiler/exception.h" namespace Shader::IR { @@ -88,10 +88,10 @@ constexpr size_t NumParams = 32; [[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) { const int result{static_cast(attr) + num}; if (result > static_cast(Attribute::Param31)) { - throw LogicError("Overflow on register arithmetic"); + UNREACHABLE_MSG("Overflow on register arithmetic"); } if (result < static_cast(Attribute::RenderTarget0)) { - throw LogicError("Underflow on register arithmetic"); + UNREACHABLE_MSG("Underflow on register arithmetic"); } return static_cast(result); } diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 39174c56..622a6249 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -39,10 +39,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, void Block::AddBranch(Block* block) { if (std::ranges::find(imm_successors, block) != imm_successors.end()) { - throw LogicError("Successor already inserted"); + UNREACHABLE_MSG("Successor already inserted"); } if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { - throw LogicError("Predecessor already inserted"); + UNREACHABLE_MSG("Predecessor already inserted"); } imm_successors.push_back(block); block->imm_predecessors.push_back(this); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index da4e2e75..a796390a 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -53,6 +53,17 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferF32x4: case Opcode::StoreBufferU32: case Opcode::ImageWrite: + case Opcode::ImageAtomicIAdd32: + case Opcode::ImageAtomicSMin32: + case Opcode::ImageAtomicUMin32: + case Opcode::ImageAtomicSMax32: + case Opcode::ImageAtomicUMax32: + case Opcode::ImageAtomicInc32: + case Opcode::ImageAtomicDec32: + case Opcode::ImageAtomicAnd32: + case Opcode::ImageAtomicOr32: + case Opcode::ImageAtomicXor32: + case Opcode::ImageAtomicExchange32: return true; default: return false; @@ -61,7 +72,7 @@ bool Inst::MayHaveSideEffects() const noexcept { bool Inst::AreAllArgsImmediates() const { if (op == Opcode::Phi) { - throw LogicError("Testing for all arguments are immediates on phi instruction"); + UNREACHABLE_MSG("Testing for all arguments are immediates on phi instruction"); } return std::all_of(args.begin(), args.begin() + NumArgs(), [](const IR::Value& value) { return value.IsImmediate(); }); @@ -91,7 +102,7 @@ void Inst::SetArg(size_t index, Value value) { Block* Inst::PhiBlock(size_t index) const { if (op != Opcode::Phi) { - throw LogicError("{} is not a Phi instruction", op); + UNREACHABLE_MSG("{} is not a Phi instruction", op); } if (index >= phi_args.size()) { throw InvalidArgument("Out of bounds argument index {} in phi instruction"); @@ -143,7 +154,7 @@ void Inst::ReplaceUsesWith(Value replacement) { void Inst::ReplaceOpcode(IR::Opcode opcode) { if (opcode == IR::Opcode::Phi) { - throw LogicError("Cannot transition into Phi"); + UNREACHABLE_MSG("Cannot transition into Phi"); } if (op == Opcode::Phi) { // Transition out of phi arguments into non-phi diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 32cdc326..d2f15336 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -287,6 +287,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageFetchU32, U32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index b715bcd9..ab3a8471 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -324,7 +324,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > 32) { - throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } return (base >> shift) & ((1U << count) - 1); @@ -336,7 +336,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { const size_t left_shift{32 - back_shift}; const size_t right_shift{static_cast(32 - count)}; if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { - throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, + UNREACHABLE_MSG("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } return static_cast((base << left_shift) >> right_shift); @@ -345,7 +345,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::BitFieldInsert: FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { if (bits >= 32 || offset >= 32) { - throw LogicError("Undefined result in {}({}, {}, {}, {})", + UNREACHABLE_MSG("Undefined result in {}({}, {}, {}, {})", IR::Opcode::BitFieldInsert, base, insert, offset, bits); } return (base & ~(~(~0u << bits) << offset)) | (insert << offset); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 7f91a63c..10e8a31a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -82,6 +82,7 @@ bool IsImageInstruction(const IR::Inst& inst) { case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageFetch: + case IR::Opcode::ImageFetchU32: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageQueryDimensions: @@ -89,6 +90,17 @@ bool IsImageInstruction(const IR::Inst& inst) { case IR::Opcode::ImageGradient: case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: return true; default: return false; @@ -99,6 +111,17 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageWrite: case IR::Opcode::ImageRead: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: return true; default: return false; diff --git a/src/shader_recompiler/ir/post_order.cpp b/src/shader_recompiler/ir/post_order.cpp index 5ab72aa2..9f588690 100644 --- a/src/shader_recompiler/ir/post_order.cpp +++ b/src/shader_recompiler/ir/post_order.cpp @@ -14,7 +14,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) { BlockList post_order_blocks; if (root.type != AbstractSyntaxNode::Type::Block) { - throw LogicError("First node in abstract syntax list root is not a block"); + UNREACHABLE_MSG("First node in abstract syntax list root is not a block"); } Block* const first_block{root.data.block}; visited.insert(first_block); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index f3000528..ae38ecf3 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -3,9 +3,9 @@ #pragma once +#include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" -#include "shader_recompiler/exception.h" #include "video_core/amdgpu/pixel_format.h" namespace Shader::IR { @@ -428,10 +428,10 @@ template [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { const int result{static_cast(reg) + num}; if (result >= static_cast(Reg::Max)) { - throw LogicError("Overflow on register arithmetic"); + UNREACHABLE_MSG("Overflow on register arithmetic"); } if (result < 0) { - throw LogicError("Underflow on register arithmetic"); + UNREACHABLE_MSG("Underflow on register arithmetic"); } return static_cast(result); } diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp index a455f8b1..9cbb9e7c 100644 --- a/src/shader_recompiler/ir/value.cpp +++ b/src/shader_recompiler/ir/value.cpp @@ -83,7 +83,7 @@ bool Value::operator==(const Value& other) const { case Type::F64x4: break; } - throw LogicError("Invalid type {}", type); + UNREACHABLE_MSG("Invalid type {}", type); } bool Value::operator!=(const Value& other) const { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 1e06881b..37d9188c 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -396,6 +396,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Snorm) { return vk::Format::eR16G16B16A16Snorm; } + if (data_format == AmdGpu::DataFormat::Format32_32 && + num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR32G32Uint; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 1fc7790d..0e846d66 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -156,6 +156,7 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); + add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); @@ -237,6 +238,12 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceDepthClipControlFeaturesEXT{ .depthClipControl = true, }, + vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{ + .workgroupMemoryExplicitLayout = true, + .workgroupMemoryExplicitLayoutScalarBlockLayout = true, + .workgroupMemoryExplicitLayout8BitAccess = true, + .workgroupMemoryExplicitLayout16BitAccess = true, + } }; if (!color_write_en) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8cd6f893..7ea610db 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -402,7 +402,7 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { const u32 interval_size = interval_end_addr - interval_start_addr; void* addr = reinterpret_cast(interval_start_addr); if (delta > 0 && count == delta) { - mprotect(addr, interval_size, PAGE_READONLY); + //mprotect(addr, interval_size, PAGE_READONLY); } else if (delta < 0 && count == -delta) { mprotect(addr, interval_size, PAGE_READWRITE); } else {