shader_recompiler: Add LDEXP

This commit is contained in:
IndecisiveTurtle 2024-07-01 23:59:36 +03:00
parent af3bbc33e9
commit 91aed76920
16 changed files with 50 additions and 6 deletions

View File

@ -129,7 +129,11 @@ public:
const auto end = std::chrono::high_resolution_clock::now();
const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
*timeout -= time;
if (status == std::cv_status::timeout) {
*timeout = 0;
} else {
*timeout -= time;
}
return GetResult(status == std::cv_status::timeout);
}
};

View File

@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
template <StringLiteral name, class F, F f>
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
#define W(foo) foo
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \

View File

@ -98,6 +98,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
return ctx.OpExp2(ctx.F32[1], value);
}
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
return ctx.OpLdexp(ctx.F32[1], value, exp);
}
Id EmitFPLog2(EmitContext& ctx, Id value) {
return ctx.OpLog2(ctx.F32[1], value);
}

View File

@ -172,6 +172,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
Id EmitFPSin(EmitContext& ctx, Id value);
Id EmitFPCos(EmitContext& ctx, Id value);
Id EmitFPExp2(EmitContext& ctx, Id value);
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
Id EmitFPLog2(EmitContext& ctx, Id value);
Id EmitFPRecip32(EmitContext& ctx, Id value);
Id EmitFPRecip64(EmitContext& ctx, Id value);

View File

@ -345,6 +345,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_BFREV_B32:
translator.V_BFREV_B32(inst);
break;
case Opcode::V_LDEXP_F32:
translator.V_LDEXP_F32(inst);
break;
case Opcode::V_FRACT_F32:
translator.V_FRACT_F32(inst);
break;

View File

@ -133,6 +133,7 @@ public:
void V_NOT_B32(const GcnInst& inst);
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
void V_BFREV_B32(const GcnInst& inst);
void V_LDEXP_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View File

@ -502,4 +502,10 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(src0));
}
void Translator::V_LDEXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
} // namespace Shader::Gcn

View File

@ -603,6 +603,10 @@ F32 IREmitter::FPExp2(const F32& value) {
return Inst<F32>(Opcode::FPExp2, value);
}
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
return Inst<F32>(Opcode::FPLdexp, value, exp);
}
F32 IREmitter::FPLog2(const F32& value) {
return Inst<F32>(Opcode::FPLog2, value);
}

View File

@ -120,6 +120,7 @@ public:
[[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value);

View File

@ -148,6 +148,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, )
OPCODE(FPLdexp, F32, F32, U32, )
OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate32, F32, F32, )

View File

@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;

View File

@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
u32 addr_hi;
};
struct PM4CmdDrawIndexBufferSize {
PM4Type3Header header;
u32 num_indices;
};
struct PM4CmdIndirectBuffer {
PM4Type3Header header;
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned

View File

@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR16G16B16A16Snorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View File

@ -203,6 +203,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
DumpShader(code, hash, stage, "bin");
}
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
return nullptr;
}
block_pool.ReleaseContents();
inst_pool.ReleaseContents();

View File

@ -321,12 +321,13 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil ? vk::ImageAspectFlagBits::eDepth : aspect_mask;
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = aspect_mask,
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,

View File

@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
image_id = image_ids[0];
}
RegisterMeta(info, image_id);
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
const ImageId image_id = FindImage(info, buffer.Address(), false);
const ImageId image_id = FindImage(info, buffer.Address());
Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified;