From 6428b04f0645436a79e2deaabd88334c345b108d Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 7 Jun 2024 15:30:02 +0300 Subject: [PATCH] shader_recompiler: Add more instructions * Also fix some minor issues with a few existing instructions --- .../backend/spirv/emit_spirv_image.cpp | 23 +++++++++++++------ .../backend/spirv/emit_spirv_instructions.h | 8 +++---- .../frontend/translate/scalar_memory.cpp | 12 ++++++++-- .../frontend/translate/translate.cpp | 20 +++++++++++++++- .../frontend/translate/translate.h | 2 ++ .../frontend/translate/vector_alu.cpp | 14 +++++++++-- .../ir/passes/resource_tracking_pass.cpp | 9 ++++++++ 7 files changed, 72 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index ac8f22af..7c21e6fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -12,13 +12,17 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - const auto info = inst->Flags(); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, + spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -26,9 +30,14 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va throw NotImplementedException("SPIR-V Instruction"); } -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, + Id bias_lc, Id offset) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, + spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 728dd2bc..33a44935 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -334,12 +334,12 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, + Id bias_lc, Id offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 14028b76..2cf5c5b2 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -18,10 +18,18 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { } void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { + static constexpr u32 SQ_SRC_LITERAL = 0xFF; const auto& smrd = inst.control.smrd; const IR::ScalarReg sbase{inst.src[0].code * 2}; - const IR::U32 dword_offset = - smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset)); + const IR::U32 dword_offset = [&] -> IR::U32 { + if (smrd.imm) { + return ir.Imm32(smrd.offset); + } + if (smrd.offset == SQ_SRC_LITERAL) { + return ir.Imm32(inst.src[1].code); + } + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); + }(); const IR::Value vsharp = ir.GetScalarReg(sbase); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2abc87a6..510b4b28 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -129,7 +129,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { } break; case OperandField::VccHi: - value = ir.GetVccHi(); + if (force_flt) { + value = ir.BitCast(ir.GetVccHi()); + } else { + value = ir.GetVccHi(); + } break; default: UNREACHABLE(); @@ -297,6 +301,8 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MADAK_F32: // Yes these can share the opcode translator.V_FMA_F32(inst); break; + case Opcode::IMAGE_SAMPLE_C_LZ: + case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: translator.IMAGE_SAMPLE(inst); break; @@ -351,9 +357,15 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_CMP_LG_U32: translator.S_CMP(ConditionOp::LG, false, inst); break; + case Opcode::S_CMP_LT_I32: + translator.S_CMP(ConditionOp::LT, true, inst); + break; case Opcode::S_CMP_LG_I32: translator.S_CMP(ConditionOp::LG, true, inst); break; + case Opcode::S_CMP_GT_I32: + translator.S_CMP(ConditionOp::GT, true, inst); + break; case Opcode::S_CMP_EQ_I32: translator.S_CMP(ConditionOp::EQ, true, inst); break; @@ -387,6 +399,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_SIN_F32: translator.V_SIN_F32(inst); break; + case Opcode::V_COS_F32: + translator.V_COS_F32(inst); + break; case Opcode::V_LOG_F32: translator.V_LOG_F32(inst); break; @@ -522,6 +537,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_RNDNE_F32: translator.V_RNDNE_F32(inst); break; + case Opcode::V_BCNT_U32_B32: + translator.V_BCNT_U32_B32(inst); + break; case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6fd8e3f5..870cb3aa 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -104,6 +104,8 @@ public: void V_ASHRREV_I32(const GcnInst& inst); void V_MAD_U32_U24(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst); + void V_BCNT_U32_B32(const GcnInst& inst); + void V_COS_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7484da57..0a3ec92e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -25,8 +25,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { } void Translator::V_MUL_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { @@ -372,4 +371,15 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPRoundEven(src0)); } +void Translator::V_BCNT_U32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1)); +} + +void Translator::V_COS_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPCos(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 68b4fb11..36e816fb 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -148,6 +148,9 @@ private: } // Anonymous namespace SharpLocation TrackSharp(const IR::Inst* inst) { + while (inst->GetOpcode() == IR::Opcode::Phi) { + inst = inst->Arg(0).InstRecursive(); + } if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ .sgpr_base = u32(IR::ScalarReg::Max), @@ -163,6 +166,12 @@ SharpLocation TrackSharp(const IR::Inst* inst) { // Retrieve SGPR pair that holds sbase const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive(); const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive(); + while (sbase0->GetOpcode() == IR::Opcode::Phi) { + sbase0 = sbase0->Arg(0).TryInstRecursive(); + } + while (sbase1->GetOpcode() == IR::Opcode::Phi) { + sbase1 = sbase1->Arg(0).TryInstRecursive(); + } ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData && sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");