shader_recompiler: Add LDEXP
This commit is contained in:
parent
af3bbc33e9
commit
91aed76920
|
@ -129,7 +129,11 @@ public:
|
||||||
const auto end = std::chrono::high_resolution_clock::now();
|
const auto end = std::chrono::high_resolution_clock::now();
|
||||||
const auto time =
|
const auto time =
|
||||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||||
|
if (status == std::cv_status::timeout) {
|
||||||
|
*timeout = 0;
|
||||||
|
} else {
|
||||||
*timeout -= time;
|
*timeout -= time;
|
||||||
|
}
|
||||||
return GetResult(status == std::cv_status::timeout);
|
return GetResult(status == std::cv_status::timeout);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
|
||||||
template <StringLiteral name, class F, F f>
|
template <StringLiteral name, class F, F f>
|
||||||
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
|
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
|
||||||
|
|
||||||
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
|
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
|
||||||
#define W(foo) foo
|
#define W(foo) foo
|
||||||
|
|
||||||
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \
|
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \
|
||||||
|
|
|
@ -98,6 +98,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpExp2(ctx.F32[1], value);
|
return ctx.OpExp2(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
|
||||||
|
return ctx.OpLdexp(ctx.F32[1], value, exp);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitFPLog2(EmitContext& ctx, Id value) {
|
Id EmitFPLog2(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpLog2(ctx.F32[1], value);
|
return ctx.OpLog2(ctx.F32[1], value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,6 +172,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
|
||||||
Id EmitFPSin(EmitContext& ctx, Id value);
|
Id EmitFPSin(EmitContext& ctx, Id value);
|
||||||
Id EmitFPCos(EmitContext& ctx, Id value);
|
Id EmitFPCos(EmitContext& ctx, Id value);
|
||||||
Id EmitFPExp2(EmitContext& ctx, Id value);
|
Id EmitFPExp2(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
|
||||||
Id EmitFPLog2(EmitContext& ctx, Id value);
|
Id EmitFPLog2(EmitContext& ctx, Id value);
|
||||||
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPRecip64(EmitContext& ctx, Id value);
|
Id EmitFPRecip64(EmitContext& ctx, Id value);
|
||||||
|
|
|
@ -345,6 +345,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_BFREV_B32:
|
case Opcode::V_BFREV_B32:
|
||||||
translator.V_BFREV_B32(inst);
|
translator.V_BFREV_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_LDEXP_F32:
|
||||||
|
translator.V_LDEXP_F32(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_FRACT_F32:
|
case Opcode::V_FRACT_F32:
|
||||||
translator.V_FRACT_F32(inst);
|
translator.V_FRACT_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -133,6 +133,7 @@ public:
|
||||||
void V_NOT_B32(const GcnInst& inst);
|
void V_NOT_B32(const GcnInst& inst);
|
||||||
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
|
||||||
void V_BFREV_B32(const GcnInst& inst);
|
void V_BFREV_B32(const GcnInst& inst);
|
||||||
|
void V_LDEXP_F32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||||
|
|
|
@ -502,4 +502,10 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.BitReverse(src0));
|
SetDst(inst.dst[0], ir.BitReverse(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -603,6 +603,10 @@ F32 IREmitter::FPExp2(const F32& value) {
|
||||||
return Inst<F32>(Opcode::FPExp2, value);
|
return Inst<F32>(Opcode::FPExp2, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F32 IREmitter::FPLdexp(const F32& value, const U32& exp) {
|
||||||
|
return Inst<F32>(Opcode::FPLdexp, value, exp);
|
||||||
|
}
|
||||||
|
|
||||||
F32 IREmitter::FPLog2(const F32& value) {
|
F32 IREmitter::FPLog2(const F32& value) {
|
||||||
return Inst<F32>(Opcode::FPLog2, value);
|
return Inst<F32>(Opcode::FPLog2, value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,6 +120,7 @@ public:
|
||||||
[[nodiscard]] F32 FPSin(const F32& value);
|
[[nodiscard]] F32 FPSin(const F32& value);
|
||||||
[[nodiscard]] F32 FPExp2(const F32& value);
|
[[nodiscard]] F32 FPExp2(const F32& value);
|
||||||
[[nodiscard]] F32 FPLog2(const F32& value);
|
[[nodiscard]] F32 FPLog2(const F32& value);
|
||||||
|
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
|
||||||
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
||||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||||
[[nodiscard]] F32 FPSqrt(const F32& value);
|
[[nodiscard]] F32 FPSqrt(const F32& value);
|
||||||
|
|
|
@ -148,6 +148,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
|
||||||
OPCODE(FPSqrt, F32, F32, )
|
OPCODE(FPSqrt, F32, F32, )
|
||||||
OPCODE(FPSin, F32, F32, )
|
OPCODE(FPSin, F32, F32, )
|
||||||
OPCODE(FPExp2, F32, F32, )
|
OPCODE(FPExp2, F32, F32, )
|
||||||
|
OPCODE(FPLdexp, F32, F32, U32, )
|
||||||
OPCODE(FPCos, F32, F32, )
|
OPCODE(FPCos, F32, F32, )
|
||||||
OPCODE(FPLog2, F32, F32, )
|
OPCODE(FPLog2, F32, F32, )
|
||||||
OPCODE(FPSaturate32, F32, F32, )
|
OPCODE(FPSaturate32, F32, F32, )
|
||||||
|
|
|
@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::IndexBufferSize: {
|
||||||
|
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||||
|
regs.num_indices = index_size->num_indices;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::EventWrite: {
|
case PM4ItOpcode::EventWrite: {
|
||||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
|
||||||
u32 addr_hi;
|
u32 addr_hi;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PM4CmdDrawIndexBufferSize {
|
||||||
|
PM4Type3Header header;
|
||||||
|
u32 num_indices;
|
||||||
|
};
|
||||||
|
|
||||||
struct PM4CmdIndirectBuffer {
|
struct PM4CmdIndirectBuffer {
|
||||||
PM4Type3Header header;
|
PM4Type3Header header;
|
||||||
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
||||||
|
|
|
@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
num_format == AmdGpu::NumberFormat::Float) {
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
return vk::Format::eR16G16Sfloat;
|
return vk::Format::eR16G16Sfloat;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return vk::Format::eR16G16B16A16Snorm;
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -203,6 +203,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||||
DumpShader(code, hash, stage, "bin");
|
DumpShader(code, hash, stage, "bin");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hash == 0xcafe3773 || hash == 0xc6602df2) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
block_pool.ReleaseContents();
|
block_pool.ReleaseContents();
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
|
|
||||||
|
|
|
@ -321,12 +321,13 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||||
|
|
||||||
// Copy to the image.
|
// Copy to the image.
|
||||||
|
const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil ? vk::ImageAspectFlagBits::eDepth : aspect_mask;
|
||||||
const vk::BufferImageCopy image_copy = {
|
const vk::BufferImageCopy image_copy = {
|
||||||
.bufferOffset = offset,
|
.bufferOffset = offset,
|
||||||
.bufferRowLength = info.pitch,
|
.bufferRowLength = info.pitch,
|
||||||
.bufferImageHeight = info.size.height,
|
.bufferImageHeight = info.size.height,
|
||||||
.imageSubresource{
|
.imageSubresource{
|
||||||
.aspectMask = aspect_mask,
|
.aspectMask = aspect,
|
||||||
.mipLevel = 0,
|
.mipLevel = 0,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = 1,
|
.layerCount = 1,
|
||||||
|
|
|
@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
|
||||||
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
||||||
RegisterImage(image_id);
|
RegisterImage(image_id);
|
||||||
} else {
|
} else {
|
||||||
image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0];
|
image_id = image_ids[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
RegisterMeta(info, image_id);
|
RegisterMeta(info, image_id);
|
||||||
|
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
||||||
RefreshImage(image);
|
RefreshImage(image);
|
||||||
TrackImage(image, image_id);
|
TrackImage(image, image_id);
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
|
||||||
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||||
const ImageInfo info{buffer, hint};
|
const ImageInfo info{buffer, hint};
|
||||||
const ImageId image_id = FindImage(info, buffer.Address(), false);
|
const ImageId image_id = FindImage(info, buffer.Address());
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue