shader_recompiler: Add LDEXP

This commit is contained in:
IndecisiveTurtle 2024-07-01 23:59:36 +03:00
parent af3bbc33e9
commit 91aed76920
16 changed files with 50 additions and 6 deletions

View File

@ -129,7 +129,11 @@ public:
const auto end = std::chrono::high_resolution_clock::now(); const auto end = std::chrono::high_resolution_clock::now();
const auto time = const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
if (status == std::cv_status::timeout) {
*timeout = 0;
} else {
*timeout -= time; *timeout -= time;
}
return GetResult(status == std::cv_status::timeout); return GetResult(status == std::cv_status::timeout);
} }
}; };

View File

@ -98,6 +98,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
return ctx.OpExp2(ctx.F32[1], value); return ctx.OpExp2(ctx.F32[1], value);
} }
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
return ctx.OpLdexp(ctx.F32[1], value, exp);
}
Id EmitFPLog2(EmitContext& ctx, Id value) { Id EmitFPLog2(EmitContext& ctx, Id value) {
return ctx.OpLog2(ctx.F32[1], value); return ctx.OpLog2(ctx.F32[1], value);
} }

View File

@ -172,6 +172,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
Id EmitFPSin(EmitContext& ctx, Id value); Id EmitFPSin(EmitContext& ctx, Id value);
Id EmitFPCos(EmitContext& ctx, Id value); Id EmitFPCos(EmitContext& ctx, Id value);
Id EmitFPExp2(EmitContext& ctx, Id value); Id EmitFPExp2(EmitContext& ctx, Id value);
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
Id EmitFPLog2(EmitContext& ctx, Id value); Id EmitFPLog2(EmitContext& ctx, Id value);
Id EmitFPRecip32(EmitContext& ctx, Id value); Id EmitFPRecip32(EmitContext& ctx, Id value);
Id EmitFPRecip64(EmitContext& ctx, Id value); Id EmitFPRecip64(EmitContext& ctx, Id value);

View File

@ -345,6 +345,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_BFREV_B32: case Opcode::V_BFREV_B32:
translator.V_BFREV_B32(inst); translator.V_BFREV_B32(inst);
break; break;
case Opcode::V_LDEXP_F32:
translator.V_LDEXP_F32(inst);
break;
case Opcode::V_FRACT_F32: case Opcode::V_FRACT_F32:
translator.V_FRACT_F32(inst); translator.V_FRACT_F32(inst);
break; break;

View File

@ -133,6 +133,7 @@ public:
void V_NOT_B32(const GcnInst& inst); void V_NOT_B32(const GcnInst& inst);
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
void V_BFREV_B32(const GcnInst& inst); void V_BFREV_B32(const GcnInst& inst);
void V_LDEXP_F32(const GcnInst& inst);
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View File

@ -502,4 +502,10 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(src0)); SetDst(inst.dst[0], ir.BitReverse(src0));
} }
void Translator::V_LDEXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -603,6 +603,10 @@ F32 IREmitter::FPExp2(const F32& value) {
return Inst<F32>(Opcode::FPExp2, value); return Inst<F32>(Opcode::FPExp2, value);
} }
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
return Inst<F32>(Opcode::FPLdexp, value, exp);
}
F32 IREmitter::FPLog2(const F32& value) { F32 IREmitter::FPLog2(const F32& value) {
return Inst<F32>(Opcode::FPLog2, value); return Inst<F32>(Opcode::FPLog2, value);
} }

View File

@ -120,6 +120,7 @@ public:
[[nodiscard]] F32 FPSin(const F32& value); [[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
[[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value);

View File

@ -148,6 +148,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, ) OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, ) OPCODE(FPExp2, F32, F32, )
OPCODE(FPLdexp, F32, F32, U32, )
OPCODE(FPCos, F32, F32, ) OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, ) OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate32, F32, F32, ) OPCODE(FPSaturate32, F32, F32, )

View File

@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi); regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break; break;
} }
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: { case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header); // const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break; break;

View File

@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
u32 addr_hi; u32 addr_hi;
}; };
struct PM4CmdDrawIndexBufferSize {
PM4Type3Header header;
u32 num_indices;
};
struct PM4CmdIndirectBuffer { struct PM4CmdIndirectBuffer {
PM4Type3Header header; PM4Type3Header header;
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned

View File

@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Float) { num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16Sfloat; return vk::Format::eR16G16Sfloat;
} }
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR16G16B16A16Snorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
} }

View File

@ -203,6 +203,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
DumpShader(code, hash, stage, "bin"); DumpShader(code, hash, stage, "bin");
} }
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
return nullptr;
}
block_pool.ReleaseContents(); block_pool.ReleaseContents();
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();

View File

@ -321,12 +321,13 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image. // Copy to the image.
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil ? vk::ImageAspectFlagBits::eDepth : aspect_mask;
const vk::BufferImageCopy image_copy = { const vk::BufferImageCopy image_copy = {
.bufferOffset = offset, .bufferOffset = offset,
.bufferRowLength = info.pitch, .bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height, .bufferImageHeight = info.size.height,
.imageSubresource{ .imageSubresource{
.aspectMask = aspect_mask, .aspectMask = aspect,
.mipLevel = 0, .mipLevel = 0,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = 1, .layerCount = 1,

View File

@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
image_id = slot_images.insert(instance, scheduler, info, cpu_address); image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id); RegisterImage(image_id);
} else { } else {
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0]; image_id = image_ids[0];
} }
RegisterMeta(info, image_id); RegisterMeta(info, image_id);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) { if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image); RefreshImage(image);
TrackImage(image, image_id); TrackImage(image, image_id);
} }
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) { const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint}; const ImageInfo info{buffer, hint};
const ImageId image_id = FindImage(info, buffer.Address(), false); const ImageId image_id = FindImage(info, buffer.Address());
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;