diff --git a/.gitmodules b/.gitmodules index 14f7520f..4a8ac3b8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -51,7 +51,7 @@ url = https://github.com/zyantific/zydis.git [submodule "externals/sirit"] path = externals/sirit - url = https://github.com/raphaelthegreat/sirit.git + url = https://github.com/shadps4-emu/sirit [submodule "externals/xxhash"] path = externals/xxhash url = https://github.com/Cyan4973/xxHash.git diff --git a/externals/sirit b/externals/sirit index fc65ebb5..505cc66a 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit fc65ebb5b56b849b1205d5baa2ca38440096652d +Subproject commit 505cc66a2be70b268c1700fef4d5327a5fe46494 diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index dc2e8f02..6ea076ed 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -110,6 +110,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ + SUB(Render, Recompiler) \ CLS(Input) \ CLS(Tty) \ CLS(Loader) diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 687bcfce..3d9c87b1 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -77,6 +77,7 @@ enum class Class : u8 { Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend + Render_Recompiler, ///< Shader recompiler Loader, ///< ROM loader Input, ///< Input emulation Tty, ///< Debug output from emu diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 99547ff4..4ebbd465 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -185,7 +185,7 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) } void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - UNREACHABLE(); + EmitStoreBufferU32(ctx, inst, handle, address, value); } void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 5aa1af55..204c89fa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -240,6 +240,8 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitSMulExt(EmitContext& ctx, Id a, Id b); +Id EmitUMulExt(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); Id EmitUDiv32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 74951e16..1d52a3ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -68,6 +68,14 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U64, a, b); } +Id EmitSMulExt(EmitContext& ctx, Id a, Id b) { + return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b); +} + +Id EmitUMulExt(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b); +} + Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 39e552c3..1ac25886 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -104,6 +104,9 @@ void EmitContext::DefineArithmeticTypes() { output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); + + full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2"); + full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2"); } void EmitContext::DefineInterfaces(const IR::Program& program) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index c4bc722c..49dd181f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -138,6 +138,9 @@ public: VectorIds U32{}; VectorIds U1{}; + Id full_result_i32x2; + Id full_result_u32x2; + Id true_value{}; Id false_value{}; Id u32_one_value{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 49fe2052..df9fe8b6 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool Translator::exec_contexts{}; @@ -210,6 +214,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) Translator translator{block, info}; for (const auto& inst : inst_list) { switch (inst.opcode) { + case Opcode::S_MOVK_I32: + translator.S_MOVK(inst); + break; case Opcode::S_MOV_B32: translator.S_MOV(inst); break; @@ -421,6 +428,12 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MAX_F32: translator.V_MAX_F32(inst); break; + case Opcode::V_MAX_I32: + translator.V_MAX_U32(true, inst); + break; + case Opcode::V_MAX_U32: + translator.V_MAX_U32(false, inst); + break; case Opcode::V_RSQ_F32: translator.V_RSQ_F32(inst); break; @@ -581,8 +594,11 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_ADD_I32: translator.S_ADD_I32(inst); break; + case Opcode::V_MUL_HI_U32: + translator.V_MUL_HI_U32(false, inst); + break; case Opcode::V_MUL_LO_I32: - translator.V_MUL_LO_I32(inst); + translator.V_MUL_LO_U32(inst); break; case Opcode::V_SAD_U32: translator.V_SAD_U32(inst); @@ -641,6 +657,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_BFM_B32: translator.S_BFM_B32(inst); break; + case Opcode::V_TRUNC_F32: + translator.V_TRUNC_F32(inst); + break; case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: @@ -654,7 +673,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) break; default: const u32 opcode = u32(inst.opcode); - UNREACHABLE_MSG("Unknown opcode {}", opcode); + LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})", + magic_enum::enum_name(inst.opcode), opcode); + info.translation_failed = true; } } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 64e45a2c..10a4580f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -34,6 +34,7 @@ public: void EmitFetch(const GcnInst& inst); // Scalar ALU + void S_MOVK(const GcnInst& inst); void S_MOV(const GcnInst& inst); void S_MUL_I32(const GcnInst& inst); void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); @@ -79,6 +80,7 @@ public: void V_FMA_F32(const GcnInst& inst); void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); + void V_MAX_U32(bool is_signed, const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst); void V_SIN_F32(const GcnInst& inst); void V_LOG_F32(const GcnInst& inst); @@ -96,7 +98,7 @@ public: void V_SUBREV_I32(const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_LSHRREV_B32(const GcnInst& inst); - void V_MUL_LO_I32(const GcnInst& inst); + void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); void V_BFE_U32(const GcnInst& inst); void V_MAD_I32_I24(const GcnInst& inst); @@ -112,6 +114,7 @@ public: void V_CVT_I32_F32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); + void V_TRUNC_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7a5bd49e..7bad5799 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -197,6 +197,12 @@ void Translator::V_MAX_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, src1)); } +void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed)); +} + void Translator::V_RSQ_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); @@ -320,10 +326,11 @@ void Translator::V_LSHRREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F)))); } -void Translator::V_MUL_LO_I32(const GcnInst& inst) { +void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.IMul(src0, src1)); + const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)}; + SetDst(inst.dst[0], hi); } void Translator::V_SAD_U32(const GcnInst& inst) { @@ -418,4 +425,9 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IMul(src0, src1)); } +void Translator::V_TRUNC_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPTrunc(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index aad9ba84..00f7fcda 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -216,18 +216,22 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns const IR::VectorReg src_reg{inst.src[1].code}; switch (num_dwords) { case 1: - value = ir.GetVectorReg(src_reg); + value = ir.GetVectorReg(src_reg); break; case 2: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), + ir.GetVectorReg(src_reg + 1)); break; case 3: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), + ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2)); break; case 4: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), + ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2), + ir.GetVectorReg(src_reg + 3)); break; } ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 276269af..d7e1d477 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -880,6 +880,10 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { } } +IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { + return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); +} + U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 3e951f82..917de458 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -146,6 +146,7 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); + [[nodiscard]] IR::Value IMulExt(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 71933096..18c0ce0b 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -197,6 +197,8 @@ OPCODE(IAdd64, U64, U64, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(SMulExt, U32x2, U32, U32, ) +OPCODE(UMulExt, U32x2, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) OPCODE(UDiv32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index c7318460..63474958 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -127,6 +127,7 @@ struct Info { Stage stage; bool uses_group_quad{}; + bool translation_failed{}; // indicates that shader has unsupported instructions template T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index feb39a30..f82a976a 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -312,6 +312,12 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eBc3SrgbBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc3UnormBlock; + } + if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc4UnormBlock; + } if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16B16A16Sint; @@ -322,9 +328,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc1RgbaUnormBlock; } - if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) { - return vk::Format::eBc3UnormBlock; - } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR8G8B8A8Uint; @@ -361,22 +364,19 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, "Unsupported component swap mode {}", static_cast(comp_swap)); const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; - - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm : base_format; - case vk::Format::eB8G8R8A8Unorm: - return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm : base_format; - case vk::Format::eR8G8B8A8Srgb: - return comp_swap_alt ? vk::Format::eB8G8R8A8Unorm - : is_vo_surface ? vk::Format::eR8G8B8A8Unorm - : base_format; - case vk::Format::eB8G8R8A8Srgb: - return comp_swap_alt ? vk::Format::eR8G8B8A8Unorm - : is_vo_surface ? vk::Format::eB8G8R8A8Unorm - : base_format; + if (comp_swap_alt) { + switch (base_format) { + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eB8G8R8A8Unorm; + case vk::Format::eB8G8R8A8Unorm: + return vk::Format::eR8G8B8A8Unorm; + case vk::Format::eR8G8B8A8Srgb: + return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb; + case vk::Format::eB8G8R8A8Srgb: + return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb; + } } - UNREACHABLE_MSG("Unsupported base format {}", vk::to_string(base_format)); + return base_format; } vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d5a839b..cb72edaf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,7 +189,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. - LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash); + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); @@ -224,6 +224,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. + LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); const Shader::Info info = MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 72566a88..240dad7a 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -85,7 +85,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info } const vk::ImageViewCreateInfo image_view_ci = { - .pNext = nullptr, + .pNext = usage_override ? &usage_ci : nullptr, .image = image.image, .viewType = info.type, .format = format,