diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index c5237d0a..3393138d 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -727,6 +727,9 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) { LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); + delete *cond; + *cond = nullptr; + switch (result) { case 0: return SCE_OK; @@ -1142,7 +1145,7 @@ int PS4_SYSV_ABI scePthreadCondWait(ScePthreadCond* cond, ScePthreadMutex* mutex } int result = pthread_cond_wait(&(*cond)->cond, &(*mutex)->pth_mutex); - LOG_INFO(Kernel_Pthread, "scePthreadCondWait, result={}", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondWait, result={}", result); switch (result) { case 0: @@ -1162,7 +1165,7 @@ int PS4_SYSV_ABI scePthreadCondattrDestroy(ScePthreadCondattr* attr) { } int result = pthread_condattr_destroy(&(*attr)->cond_attr); - LOG_INFO(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondattrDestroy: result = {} ", result); switch (result) { case 0: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index bd4ac066..38afd90f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -11,7 +11,7 @@ Id SubgroupScope(EmitContext& ctx) { } Id EmitWarpId(EmitContext& ctx) { - return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id); + UNREACHABLE(); } Id EmitLaneId(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index f7b30052..8ca8b7a3 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -225,7 +225,6 @@ void EmitContext::DefineInputs(const Info& info) { break; } case Stage::Fragment: - subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input); subgroup_local_invocation_id = DefineVariable( U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input); Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 34c13d3f..2aa1bf78 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -180,7 +180,6 @@ public: Id workgroup_id{}; Id local_invocation_id{}; - Id subgroup_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 14837166..532e024e 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -48,7 +48,8 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { // Pair loads are either 32 or 64-bit - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const u32 adj = bit_size == 32 ? 4 : 8; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); if (bit_size == 32) { ir.SetVectorReg(dst_reg++, IR::U32{data0}); @@ -56,7 +57,7 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)}); } - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); if (bit_size == 32) { ir.SetVectorReg(dst_reg++, IR::U32{data1}); @@ -65,11 +66,13 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); } } else if (bit_size == 64) { - const IR::Value data = ir.LoadShared(bit_size, is_signed, addr); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); } else { - const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)}; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; ir.SetVectorReg(dst_reg, data); } } @@ -79,7 +82,8 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI const IR::VectorReg data0{inst.src[1].code}; const IR::VectorReg data1{inst.src[2].code}; if (is_pair) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const u32 adj = bit_size == 32 ? 4 : 8; + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); if (bit_size == 32) { ir.WriteShared(32, ir.GetVectorReg(data0), addr0); } else { @@ -87,7 +91,7 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI 64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), addr0); } - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); if (bit_size == 32) { ir.WriteShared(32, ir.GetVectorReg(data1), addr1); } else { @@ -96,11 +100,13 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI addr1); } } else if (bit_size == 64) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); - ir.WriteShared(bit_size, data, addr); + ir.WriteShared(bit_size, data, addr0); } else { - ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8d1b7683..fe4457d2 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -125,6 +125,7 @@ public: void V_ADD_F32(const GcnInst& inst); void V_CVT_OFF_F32_I4(const GcnInst& inst); void V_MED3_F32(const GcnInst& inst); + void V_MED3_I32(const GcnInst& inst); void V_FLOOR_F32(const GcnInst& inst); void V_SUB_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst); @@ -159,6 +160,7 @@ public: void V_SUB_I32(const GcnInst& inst); void V_LSHR_B32(const GcnInst& inst); void V_ASHRREV_I32(const GcnInst& inst); + void V_ASHR_I32(const GcnInst& inst); void V_MAD_U32_U24(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst); void V_BCNT_U32_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 669ef7ca..89428c44 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -24,6 +24,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_LSHR_B32(inst); case Opcode::V_ASHRREV_I32: return V_ASHRREV_I32(inst); + case Opcode::V_ASHR_I32: + return V_ASHR_I32(inst); case Opcode::V_LSHRREV_B32: return V_LSHRREV_B32(inst); case Opcode::V_NOT_B32: @@ -183,6 +185,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_ADD_F32(inst); case Opcode::V_MED3_F32: return V_MED3_F32(inst); + case Opcode::V_MED3_I32: + return V_MED3_I32(inst); case Opcode::V_FLOOR_F32: return V_FLOOR_F32(inst); case Opcode::V_SUB_F32: @@ -479,6 +483,14 @@ void Translator::V_MED3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); } +void Translator::V_MED3_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2); + SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); +} + void Translator::V_FLOOR_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -760,6 +772,12 @@ void Translator::V_ASHRREV_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } +void Translator::V_ASHR_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.ShiftRightArithmetic(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F)))); +} + void Translator::V_MAD_U32_U24(const GcnInst& inst) { V_MAD_I32_I24(inst, false); } @@ -925,25 +943,12 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 lane_id = ir.LaneId(); - - const auto [warp_half, mask_shift] = [&]() -> std::pair { - if (profile.subgroup_size == 32) { - const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1)); - return std::make_pair(warp_half, lane_id); - } - const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5)); - const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F)); - return std::make_pair(warp_half, mask_shift); - }(); - - const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1)); - const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0)); - const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, is_low ? ir.Imm32(~0U) : thread_mask, - is_low ? thread_mask : ir.Imm32(0))}; - const IR::U32 masked_value = ir.BitwiseAnd(src0, mask); - const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value)); - SetDst(inst.dst[0], result); + if (!is_low) { + ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U); + return; + } + ASSERT(src0.IsImmediate() && src0.U32() == ~0U); + SetDst(inst.dst[0], ir.LaneId()); } } // namespace Shader::Gcn diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b0285809..400af031 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -1040,7 +1040,11 @@ private: return {}; } void unhandled_exception() { - UNREACHABLE(); + try { + std::rethrow_exception(std::current_exception()); + } catch (const std::exception& e) { + UNREACHABLE_MSG("Unhandled exception: {}", e.what()); + } } void return_void() {} struct empty {}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 34f1e9cc..d8e5f7fa 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -94,7 +94,9 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s const auto vsharp = buffer.GetVsharp(info); const u32 size = vsharp.GetSize(); const VAddr address = vsharp.base_address; - texture_cache.OnCpuWrite(address); + if (buffer.is_storage) { + texture_cache.OnCpuWrite(address); + } const u32 offset = staging.Copy(address, size, buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment());