diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index ba8d6300..bfa6a68d 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -129,7 +129,11 @@ public: const auto end = std::chrono::high_resolution_clock::now(); const auto time = std::chrono::duration_cast(end - start).count(); - *timeout -= time; + if (status == std::cv_status::timeout) { + *timeout = 0; + } else { + *timeout -= time; + } return GetResult(status == std::cv_status::timeout); } }; diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index 7cad7f8b..27072f1f 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -42,7 +42,7 @@ struct wrapper_impl { template constexpr auto wrapper = wrapper_impl::wrap; -// #define W(foo) wrapper<#foo, decltype(&foo), foo> +//#define W(foo) wrapper<#foo, decltype(&foo), foo> #define W(foo) foo #define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \ diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e56eb916..18d5f35a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -98,6 +98,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) { return ctx.OpExp2(ctx.F32[1], value); } +Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) { + return ctx.OpLdexp(ctx.F32[1], value, exp); +} + Id EmitFPLog2(EmitContext& ctx, Id value) { return ctx.OpLog2(ctx.F32[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 246d7c44..18519ce3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -172,6 +172,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value); Id EmitFPSin(EmitContext& ctx, Id value); Id EmitFPCos(EmitContext& ctx, Id value); Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); Id EmitFPLog2(EmitContext& ctx, Id value); Id EmitFPRecip32(EmitContext& ctx, Id value); Id EmitFPRecip64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 407ee399..f28d99be 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -345,6 +345,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_BFREV_B32: translator.V_BFREV_B32(inst); break; + case Opcode::V_LDEXP_F32: + translator.V_LDEXP_F32(inst); + break; case Opcode::V_FRACT_F32: translator.V_FRACT_F32(inst); break; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 1145de59..b88cee3d 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -133,6 +133,7 @@ public: void V_NOT_B32(const GcnInst& inst); void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst); void V_BFREV_B32(const GcnInst& inst); + void V_LDEXP_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 72b2d76a..d1b63200 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -502,4 +502,10 @@ void Translator::V_BFREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(src0)); } +void Translator::V_LDEXP_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 09bb3580..8e355723 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -603,6 +603,10 @@ F32 IREmitter::FPExp2(const F32& value) { return Inst(Opcode::FPExp2, value); } +F32 IREmitter::FPLdexp(const F32& value, const U32& exp) { + return Inst(Opcode::FPLdexp, value, exp); +} + F32 IREmitter::FPLog2(const F32& value) { return Inst(Opcode::FPLog2, value); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index cf74afc0..b49c7d72 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -120,6 +120,7 @@ public: [[nodiscard]] F32 FPSin(const F32& value); [[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp); [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index a9b895d2..7c4e15f1 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -148,6 +148,7 @@ OPCODE(FPRecipSqrt64, F64, F64, OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) OPCODE(FPExp2, F32, F32, ) +OPCODE(FPLdexp, F32, F32, U32, ) OPCODE(FPCos, F32, F32, ) OPCODE(FPLog2, F32, F32, ) OPCODE(FPSaturate32, F32, F32, ) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index f97f2d6c..2c3590fe 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanaddr_hi); break; } + case PM4ItOpcode::IndexBufferSize: { + const auto* index_size = reinterpret_cast(header); + regs.num_indices = index_size->num_indices; + break; + } case PM4ItOpcode::EventWrite: { // const auto* event = reinterpret_cast(header); break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 5b49157a..52d646e3 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase { u32 addr_hi; }; +struct PM4CmdDrawIndexBufferSize { + PM4Type3Header header; + u32 num_indices; +}; + struct PM4CmdIndirectBuffer { PM4Type3Header header; u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index abcbe8d4..1e06881b 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eR16G16Sfloat; } + if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && + num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR16G16B16A16Snorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e0b53322..8c1170ca 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -203,6 +203,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { DumpShader(code, hash, stage, "bin"); } + if (hash == 0xcafe3773 || hash == 0xc6602df2) { + return nullptr; + } + block_pool.ReleaseContents(); inst_pool.ReleaseContents(); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 427f7e4b..7396719f 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -321,12 +321,13 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); // Copy to the image. + const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil ? vk::ImageAspectFlagBits::eDepth : aspect_mask; const vk::BufferImageCopy image_copy = { .bufferOffset = offset, .bufferRowLength = info.pitch, .bufferImageHeight = info.size.height, .imageSubresource{ - .aspectMask = aspect_mask, + .aspectMask = aspect, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 02811735..8cd6f893 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -134,13 +134,13 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r image_id = slot_images.insert(instance, scheduler, info, cpu_address); RegisterImage(image_id); } else { - image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0]; + image_id = image_ids[0]; } RegisterMeta(info, image_id); Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified)) { + if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { RefreshImage(image); TrackImage(image, image_id); } @@ -193,7 +193,7 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - const ImageId image_id = FindImage(info, buffer.Address(), false); + const ImageId image_id = FindImage(info, buffer.Address()); Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified;