Recompiler: sampler patching (#236)

* recompiler: restored bfs in image instruction producers search

* recompiler: added pattern check for s# anisotropy modification

* added check if s# comes from constant load (e.g. EUD)
This commit is contained in:
psucien 2024-07-04 23:15:57 +02:00 committed by GitHub
parent 6ceab6dfac
commit dd97b517f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 13 deletions

View File

@ -182,6 +182,51 @@ private:
} // Anonymous namespace } // Anonymous namespace
std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
std::pair not_found{inst, false};
// Assuming S# is in UD s[12:15] and T# is in s[4:11]
// The next pattern:
// s_bfe_u32 s0, s7, $0x0008000c
// s_and_b32 s1, s12, $0xfffff1ff
// s_cmp_eq_u32 s0, 0
// s_cselect_b32 s0, s1, s12
// is used to disable anisotropy in the sampler if the sampled texture doesn't have mips
if (inst->GetOpcode() != IR::Opcode::SelectU32) {
return not_found;
}
// Select should be based on zero check
const auto* prod0 = inst->Arg(0).InstRecursive();
if (prod0->GetOpcode() != IR::Opcode::IEqual ||
!(prod0->Arg(1).IsImmediate() && prod0->Arg(1).U32() == 0u)) {
return not_found;
}
// The bits range is for lods
const auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract ||
prod0_arg0->Arg(1).InstRecursive()->Arg(0).U32() != 0x0008000cu) {
return not_found;
}
// Make sure mask is masking out anisotropy
const auto* prod1 = inst->Arg(1).InstRecursive();
if (prod1->GetOpcode() != IR::Opcode::BitwiseAnd32 || prod1->Arg(1).U32() != 0xfffff1ff) {
return not_found;
}
// We're working on the first dword of s#
const auto* prod2 = inst->Arg(2).InstRecursive();
if (prod2->GetOpcode() != IR::Opcode::GetUserData &&
prod2->GetOpcode() != IR::Opcode::ReadConst) {
return not_found;
}
return {prod2, true};
}
SharpLocation TrackSharp(const IR::Inst* inst) { SharpLocation TrackSharp(const IR::Inst* inst) {
while (inst->GetOpcode() == IR::Opcode::Phi) { while (inst->GetOpcode() == IR::Opcode::Phi) {
inst = inst->Arg(0).InstRecursive(); inst = inst->Arg(0).InstRecursive();
@ -329,15 +374,25 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
} }
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
IR::Inst* producer = inst.Arg(0).InstRecursive(); std::deque<IR::Inst*> insts{&inst};
while (producer->GetOpcode() == IR::Opcode::Phi) { const auto& pred = [](auto opcode) -> bool {
producer = producer->Arg(0).InstRecursive(); return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData);
};
IR::Inst* producer{};
while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) {
for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) {
const auto arg = producer->Arg(arg_idx);
if (arg.TryInstRecursive()) {
insts.push_back(arg.InstRecursive());
}
}
insts.pop_front();
} }
ASSERT(producer->GetOpcode() == ASSERT(pred(producer->GetOpcode()));
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
producer->GetOpcode() == IR::Opcode::GetUserData);
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
return std::make_pair(producer->Arg(0).InstRecursive(), return std::make_pair(producer->Arg(0).InstRecursive(),
producer->Arg(1).InstRecursive()); producer->Arg(1).InstRecursive());
@ -360,10 +415,13 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
if (ssharp_handle) { if (ssharp_handle) {
const auto ssharp = TrackSharp(ssharp_handle); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud);
const u32 sampler_binding = descriptors.Add(SamplerResource{ const u32 sampler_binding = descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base, .sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset, .dword_offset = ssharp.dword_offset,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
}); });
image_binding |= (sampler_binding << 16); image_binding |= (sampler_binding << 16);
} }

View File

@ -96,6 +96,8 @@ using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
struct SamplerResource { struct SamplerResource {
u32 sgpr_base; u32 sgpr_base;
u32 dword_offset; u32 dword_offset;
u32 associated_image : 4;
u32 disable_aniso : 1;
}; };
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>; using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;

View File

@ -348,9 +348,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
} }
} }
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
for (const auto& image_desc : stage.images) { for (const auto& image_desc : stage.images) {
const auto tsharp = const auto& tsharp = tsharps.emplace_back(
stage.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset); stage.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset));
const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage); const auto& image_view = texture_cache.FindImageView(tsharp, image_desc.is_storage);
const auto& image = texture_cache.GetImage(image_view.image_id); const auto& image = texture_cache.GetImage(image_view.image_id);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
@ -369,8 +370,13 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
} }
} }
for (const auto& sampler : stage.samplers) { for (const auto& sampler : stage.samplers) {
const auto ssharp = auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset); if (sampler.disable_aniso) {
const auto& tsharp = tsharps[sampler.associated_image];
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp); const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({ set_writes.push_back({