// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { namespace { struct SharpLocation { u32 sgpr_base; u32 dword_offset; auto operator<=>(const SharpLocation&) const = default; }; bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::LoadBufferF32: case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x4: case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBufferU32: case IR::Opcode::StoreBufferF32: case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferU32: return true; default: return false; } } IR::Type BufferDataType(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::LoadBufferF32: case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x4: case IR::Opcode::ReadConstBuffer: case IR::Opcode::StoreBufferF32: case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: return IR::Type::F32; case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBufferU32: case IR::Opcode::StoreBufferU32: return IR::Type::U32; default: UNREACHABLE(); } } bool IsBufferStore(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::StoreBufferF32: case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferU32: return true; default: return false; } } bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageFetch: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageQueryDimensions: case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: return true; default: return false; } } bool IsImageStorageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageWrite: case IR::Opcode::ImageRead: return true; default: return false; } } class Descriptors { public: explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_, SamplerResourceList& sampler_resources_) : buffer_resources{buffer_resources_}, image_resources{image_resources_}, sampler_resources{sampler_resources_} {} u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && desc.dword_offset == existing.dword_offset; })}; auto& buffer = buffer_resources[index]; ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records); buffer.is_storage |= desc.is_storage; buffer.used_types |= desc.used_types; return index; } u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && desc.dword_offset == existing.dword_offset && desc.type == existing.type && desc.is_storage == existing.is_storage; })}; return index; } u32 Add(const SamplerResource& desc) { const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && desc.dword_offset == existing.dword_offset; })}; return index; } private: template static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { const auto it{std::ranges::find_if(descriptors, pred)}; if (it != descriptors.end()) { return static_cast(std::distance(descriptors.begin(), it)); } descriptors.push_back(desc); return static_cast(descriptors.size()) - 1; } BufferResourceList& buffer_resources; ImageResourceList& image_resources; SamplerResourceList& sampler_resources; }; } // Anonymous namespace SharpLocation TrackSharp(const IR::Inst* inst) { while (inst->GetOpcode() == IR::Opcode::Phi) { inst = inst->Arg(0).InstRecursive(); } if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ .sgpr_base = u32(IR::ScalarReg::Max), .dword_offset = u32(inst->Arg(0).ScalarReg()), }; } ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory"); // Retrieve offset from base. const u32 dword_offset = inst->Arg(1).U32(); const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive(); // Retrieve SGPR pair that holds sbase const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive(); const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive(); while (sbase0->GetOpcode() == IR::Opcode::Phi) { sbase0 = sbase0->Arg(0).TryInstRecursive(); } while (sbase1->GetOpcode() == IR::Opcode::Phi) { sbase1 = sbase1->Arg(0).TryInstRecursive(); } ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData && sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported"); const IR::ScalarReg base = sbase0->Arg(0).ScalarReg(); // Return retrieved location. return SharpLocation{ .sgpr_base = u32(base), .dword_offset = dword_offset, }; } void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto sharp = TrackSharp(producer); const auto buffer = info.ReadUd(sharp.sgpr_base, sharp.dword_offset); const u32 binding = descriptors.Add(BufferResource{ .sgpr_base = sharp.sgpr_base, .dword_offset = sharp.dword_offset, .stride = buffer.GetStride(), .num_records = u32(buffer.num_records), .used_types = BufferDataType(inst), .is_storage = IsBufferStore(inst), }); const auto inst_info = inst.Flags(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; // Replace handle with binding index in buffer resource list. inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); if (inst_info.is_typed) { ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32); } if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { return; } // Calculate buffer address. const u32 dword_stride = buffer.GetStrideElements(sizeof(u32)); const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32); IR::U32 address = ir.Imm32(dword_offset); if (inst_info.index_enable && inst_info.offset_enable) { const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 1)}; const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 0)}; address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2))); } else if (inst_info.index_enable) { const IR::U32 index{inst.Arg(1)}; address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); } else if (inst_info.offset_enable) { const IR::U32 offset{inst.Arg(1)}; } inst.SetArg(1, address); } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, const IR::Value& z) { // We need to fix x and y coordinate, // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. // We already force the scale value to be 1.0 when handling v_cubema_f32, // here we subtract 1.5 to recover the original value. const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); return ir.CompositeConstruct(x, y, z); } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { std::deque insts{&inst}; const auto& pred = [](auto opcode) -> bool { return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) opcode == IR::Opcode::GetUserData); }; IR::Inst* producer{}; while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) { for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) { const auto arg = producer->Arg(arg_idx); if (arg.TryInstRecursive()) { insts.push_back(arg.InstRecursive()); } } insts.pop_front(); } ASSERT(pred(producer->GetOpcode())); const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair { if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { return std::make_pair(producer->Arg(0).InstRecursive(), producer->Arg(1).InstRecursive()); } return std::make_pair(producer, nullptr); }(); // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle); const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, .type = image.GetType(), .nfmt = static_cast(image.GetNumberFmt()), .is_storage = IsImageStorageInstruction(inst), .is_depth = bool(inst_info.is_depth), }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions if (ssharp_handle) { const auto ssharp = TrackSharp(ssharp_handle); const u32 sampler_binding = descriptors.Add(SamplerResource{ .sgpr_base = ssharp.sgpr_base, .dword_offset = ssharp.dword_offset, }); image_binding |= (sampler_binding << 16); } // Patch image handle IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(image_binding)); // Now that we know the image type, adjust texture coordinate vector. const IR::Inst* body = inst.Arg(1).InstRecursive(); const auto [coords, arg] = [&] -> std::pair { switch (image.GetType()) { case AmdGpu::ImageType::Color1D: // x return {body->Arg(0), body->Arg(1)}; case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; case AmdGpu::ImageType::Color2DArray: // x, y, slice [[fallthrough]]; case AmdGpu::ImageType::Color2DMsaa: // x, y, frag [[fallthrough]]; case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; case AmdGpu::ImageType::Cube: // x, y, face return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); } }(); inst.SetArg(1, coords); if (inst_info.has_lod_clamp) { // Final argument contains lod_clamp const u32 arg_pos = inst_info.is_depth ? 5 : 4; inst.SetArg(arg_pos, arg); } if (inst_info.explicit_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod); const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2; inst.SetArg(pos, arg); } } void ResourceTrackingPass(IR::Program& program) { // When loading data from untyped buffer we don't have if it is float or integer. // Most of the time it is float so that is the default. This pass detects float buffer loads // combined with bitcasts and patches them to be integer loads. for (IR::Block* const block : program.post_order_blocks) { break; for (IR::Inst& inst : block->Instructions()) { if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) { continue; } // Replace the bitcast with a typed buffer read IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()}; if (!arg_inst) { continue; } const auto replace{[&](IR::Opcode new_opcode) { inst.ReplaceOpcode(new_opcode); inst.SetArg(0, arg_inst->Arg(0)); inst.SetArg(1, arg_inst->Arg(1)); inst.SetFlags(arg_inst->Flags()); arg_inst->Invalidate(); }}; if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) { replace(IR::Opcode::ReadConstBufferU32); } if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) { replace(IR::Opcode::LoadBufferU32); } } } // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; Descriptors descriptors{info.buffers, info.images, info.samplers}; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (IsBufferInstruction(inst)) { PatchBufferInstruction(*block, inst, info, descriptors); continue; } if (IsImageInstruction(inst)) { PatchImageInstruction(*block, inst, info, descriptors); } } } } } // namespace Shader::Optimization