shader_recompiler: Add LDEXP
This commit is contained in:
parent
af3bbc33e9
commit
91aed76920
|
@ -129,7 +129,11 @@ public:
|
|||
const auto end = std::chrono::high_resolution_clock::now();
|
||||
const auto time =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
*timeout -= time;
|
||||
if (status == std::cv_status::timeout) {
|
||||
*timeout = 0;
|
||||
} else {
|
||||
*timeout -= time;
|
||||
}
|
||||
return GetResult(status == std::cv_status::timeout);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
|
|||
template <StringLiteral name, class F, F f>
|
||||
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
|
||||
|
||||
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
|
||||
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
|
||||
#define W(foo) foo
|
||||
|
||||
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \
|
||||
|
|
|
@ -98,6 +98,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
|
|||
return ctx.OpExp2(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
|
||||
return ctx.OpLdexp(ctx.F32[1], value, exp);
|
||||
}
|
||||
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value) {
|
||||
return ctx.OpLog2(ctx.F32[1], value);
|
||||
}
|
||||
|
|
|
@ -172,6 +172,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
|
|||
Id EmitFPSin(EmitContext& ctx, Id value);
|
||||
Id EmitFPCos(EmitContext& ctx, Id value);
|
||||
Id EmitFPExp2(EmitContext& ctx, Id value);
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip64(EmitContext& ctx, Id value);
|
||||
|
|
|
@ -345,6 +345,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_BFREV_B32:
|
||||
translator.V_BFREV_B32(inst);
|
||||
break;
|
||||
case Opcode::V_LDEXP_F32:
|
||||
translator.V_LDEXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
|
|
|
@ -133,6 +133,7 @@ public:
|
|||
void V_NOT_B32(const GcnInst& inst);
|
||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||
void V_BFREV_B32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
|
|
@ -502,4 +502,10 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(src0));
|
||||
}
|
||||
|
||||
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -603,6 +603,10 @@ F32 IREmitter::FPExp2(const F32& value) {
|
|||
return Inst<F32>(Opcode::FPExp2, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
|
||||
return Inst<F32>(Opcode::FPLdexp, value, exp);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPLog2(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPLog2, value);
|
||||
}
|
||||
|
|
|
@ -120,6 +120,7 @@ public:
|
|||
[[nodiscard]] F32 FPSin(const F32& value);
|
||||
[[nodiscard]] F32 FPExp2(const F32& value);
|
||||
[[nodiscard]] F32 FPLog2(const F32& value);
|
||||
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
|
||||
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||
[[nodiscard]] F32 FPSqrt(const F32& value);
|
||||
|
|
|
@ -148,6 +148,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
|
|||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPLdexp, F32, F32, U32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
OPCODE(FPSaturate32, F32, F32, )
|
||||
|
|
|
@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBufferSize: {
|
||||
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||
regs.num_indices = index_size->num_indices;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
break;
|
||||
|
|
|
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
|
|||
u32 addr_hi;
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexBufferSize {
|
||||
PM4Type3Header header;
|
||||
u32 num_indices;
|
||||
};
|
||||
|
||||
struct PM4CmdIndirectBuffer {
|
||||
PM4Type3Header header;
|
||||
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
||||
|
|
|
@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR16G16B16A16Snorm;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -203,6 +203,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
DumpShader(code, hash, stage, "bin");
|
||||
}
|
||||
|
||||
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
|
|
|
@ -321,12 +321,13 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
|||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
// Copy to the image.
|
||||
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil ? vk::ImageAspectFlagBits::eDepth : aspect_mask;
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = info.pitch,
|
||||
.bufferImageHeight = info.size.height,
|
||||
.imageSubresource{
|
||||
.aspectMask = aspect_mask,
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
|
|
|
@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
|
|||
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
||||
RegisterImage(image_id);
|
||||
} else {
|
||||
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
|
||||
image_id = image_ids[0];
|
||||
}
|
||||
|
||||
RegisterMeta(info, image_id);
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
||||
RefreshImage(image);
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
|
|||
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||
const ImageInfo info{buffer, hint};
|
||||
const ImageId image_id = FindImage(info, buffer.Address(), false);
|
||||
const ImageId image_id = FindImage(info, buffer.Address());
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
|
|
Loading…
Reference in New Issue