From a603bc7d88e663d8ffd140a5916cd0612001c42c Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 1 Jul 2024 22:42:45 +0300 Subject: [PATCH] shader_recompiler: More instructions --- .../libraries/kernel/thread_management.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 9 ++++-- .../backend/spirv/emit_spirv_instructions.h | 2 +- src/shader_recompiler/frontend/format.cpp | 16 +++++----- .../frontend/translate/translate.cpp | 31 +++++++++++++++++-- .../frontend/translate/translate.h | 8 +++-- .../frontend/translate/vector_alu.cpp | 24 +++++++++++--- .../frontend/translate/vector_memory.cpp | 15 +++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 4 ++- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 1 + src/shader_recompiler/ir/value.h | 1 + 12 files changed, 93 insertions(+), 21 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index c5db12a7..2b526eed 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1357,6 +1357,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); + LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 3e4cf019..7a54f31c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -113,8 +113,13 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod } } -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { - UNREACHABLE_MSG("SPIR-V Instruction"); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + const Id zero{ctx.f32_zero_value}; + return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords); } Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 316c17cb..246d7c44 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -349,7 +349,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivatives, const IR::Value& offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 379ed85f..46a40a6e 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1826,17 +1826,17 @@ constexpr std::array InstructionFormatVOP1 = {{ {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Float64}, // 17 = V_CVT_F32_UBYTE0 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 18 = V_CVT_F32_UBYTE1 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 19 = V_CVT_F32_UBYTE2 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 20 = V_CVT_F32_UBYTE3 - {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, // 21 = V_CVT_U32_F64 {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32}, // 22 = V_CVT_F64_U32 diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 27d4691d..407ee399 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -268,7 +268,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_AND_B32(inst); break; case Opcode::V_OR_B32: - translator.V_OR_B32(inst); + translator.V_OR_B32(false, inst); + break; + case Opcode::V_XOR_B32: + translator.V_OR_B32(true, inst); break; case Opcode::V_LSHLREV_B32: translator.V_LSHLREV_B32(inst); @@ -324,6 +327,24 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CVT_PKRTZ_F16_F32: translator.V_CVT_PKRTZ_F16_F32(inst); break; + case Opcode::V_CVT_F32_F16: + translator.V_CVT_F32_F16(inst); + break; + case Opcode::V_CVT_F32_UBYTE0: + translator.V_CVT_F32_UBYTE(0, inst); + break; + case Opcode::V_CVT_F32_UBYTE1: + translator.V_CVT_F32_UBYTE(1, inst); + break; + case Opcode::V_CVT_F32_UBYTE2: + translator.V_CVT_F32_UBYTE(2, inst); + break; + case Opcode::V_CVT_F32_UBYTE3: + translator.V_CVT_F32_UBYTE(3, inst); + break; + case Opcode::V_BFREV_B32: + translator.V_BFREV_B32(inst); + break; case Opcode::V_FRACT_F32: translator.V_FRACT_F32(inst); break; @@ -355,6 +376,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::IMAGE_SAMPLE_L: translator.IMAGE_SAMPLE(inst); break; + case Opcode::IMAGE_GET_LOD: + translator.IMAGE_GET_LOD(inst); + break; case Opcode::IMAGE_GATHER4_C: translator.IMAGE_GATHER(inst); break; @@ -682,7 +706,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.V_SAD_U32(inst); break; case Opcode::V_BFE_U32: - translator.V_BFE_U32(inst); + translator.V_BFE_U32(false, inst); + break; + case Opcode::V_BFE_I32: + translator.V_BFE_U32(true, inst); break; case Opcode::V_MAD_I32_I24: translator.V_MAD_I32_I24(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ef5ff8b7..1145de59 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -71,9 +71,10 @@ public: void V_SAD(const GcnInst& inst); void V_MAC_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); + void V_CVT_F32_F16(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); - void V_OR_B32(const GcnInst& inst); + void V_OR_B32(bool is_xor, const GcnInst& inst); void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); @@ -110,7 +111,7 @@ public: void V_LSHRREV_B32(const GcnInst& inst); void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); - void V_BFE_U32(const GcnInst& inst); + void V_BFE_U32(bool is_signed, const GcnInst& inst); void V_MAD_I32_I24(const GcnInst& inst); void V_MUL_I32_I24(const GcnInst& inst); void V_SUB_I32(const GcnInst& inst); @@ -130,6 +131,8 @@ public: void V_CMP_NE_U64(const GcnInst& inst); void V_BFI_B32(const GcnInst& inst); void V_NOT_B32(const GcnInst& inst); + void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); + void V_BFREV_B32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); @@ -149,6 +152,7 @@ public: void IMAGE_GATHER(const GcnInst& inst); void IMAGE_STORE(const GcnInst& inst); void IMAGE_LOAD(bool has_mip, const GcnInst& inst); + void IMAGE_GET_LOD(const GcnInst& inst); // Export void EXP(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 1dbb9062..72b2d76a 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -26,6 +26,11 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } +void Translator::V_CVT_F32_F16(const GcnInst& inst) { + const IR::U32 src0 = GetSrc(inst.src[0]); + SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0)); +} + void Translator::V_MUL_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } @@ -54,11 +59,11 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, IR::U32F32{result}); } -void Translator::V_OR_B32(const GcnInst& inst) { +void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.BitwiseOr(src0, src1)); + ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1)); } void Translator::V_AND_B32(const GcnInst& inst) { @@ -345,11 +350,11 @@ void Translator::V_SAD_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2)); } -void Translator::V_BFE_U32(const GcnInst& inst) { +void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))}; const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))}; - SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2)); + SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed)); } void Translator::V_MAD_I32_I24(const GcnInst& inst) { @@ -486,4 +491,15 @@ void Translator::V_NOT_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitwiseNot(src0)); } +void Translator::V_CVT_F32_UBYTE(u32 index, const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 byte = ir.BitFieldExtract(src0, ir.Imm32(8 * index), ir.Imm32(8)); + SetDst(inst.dst[0], ir.ConvertUToF(32, 32, byte)); +} + +void Translator::V_BFREV_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst(inst.dst[0], ir.BitReverse(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 71ca7c2e..f12b4e2f 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -307,4 +307,19 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info); } +void Translator::IMAGE_GET_LOD(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + IR::VectorReg dst_reg{inst.dst[0].code}; + IR::VectorReg addr_reg{inst.src[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value body = ir.CompositeConstruct( + ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + const IR::Value lod = ir.ImageQueryLod(handle, body, {}); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)}); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)}); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index aa95f239..09bb3580 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1119,6 +1119,8 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val switch (dest_bitsize) { case 32: switch (src_bitsize) { + case 16: + return Inst(Opcode::ConvertF32U16, value); case 32: return Inst(Opcode::ConvertF32U32, value); } @@ -1139,7 +1141,7 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s : ConvertUToF(dest_bitsize, src_bitsize, value); } -U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { +U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 917de458..cf74afc0 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -196,7 +196,7 @@ public: [[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, const Value& value); - [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] U16U32U64 UConvert(size_t result_bitsize, const U16U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 18c0ce0b..a9b895d2 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -257,6 +257,7 @@ OPCODE(ConvertF32S32, F32, U32, OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF32U16, F32, U16, ) // Image operations OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 8c97f495..a43c17f5 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -221,6 +221,7 @@ using F32 = TypedValue; using F64 = TypedValue; using U32F32 = TypedValue; using U32U64 = TypedValue; +using U16U32U64 = TypedValue; using F32F64 = TypedValue; using F16F32F64 = TypedValue; using UAny = TypedValue;