diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index c7677252..7700a3a1 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -209,6 +209,8 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { + std::scoped_lock lock{mutex}; + Vulkan::Frame* frame; if (index == -1) { frame = renderer->PrepareBlankFrame(); @@ -218,8 +220,6 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, frame = renderer->PrepareFrame(group, buffer.address_left); } - std::scoped_lock lock{mutex}; - if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { LOG_ERROR(Lib_VideoOut, "Flip queue is full"); return false; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 0c40f1c4..7a0d237c 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -242,7 +242,7 @@ void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) { auto* port = driver->GetPort(handle); if (!port) { - return 0x8029000b; + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } Platform::IrqC::Instance()->RegisterOnce( diff --git a/src/emulator.cpp b/src/emulator.cpp index b38df0c3..fda7270d 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -126,7 +126,7 @@ void Emulator::Run(const std::filesystem::path& file) { std::jthread([this](std::stop_token stop_token) { linker->Execute(); }); // Begin main window loop until the application exits - static constexpr std::chrono::microseconds FlipPeriod{10}; + static constexpr std::chrono::milliseconds FlipPeriod{16}; while (window.isOpen()) { window.waitEvent(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7933ecd7..e1931a9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -200,6 +200,8 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddCapability(spv::Capability::GroupNonUniformQuad); } ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + ctx.AddCapability(spv::Capability::ImageGatherExtended); + ctx.AddCapability(spv::Capability::ImageQuery); // if (program.info.stores_frag_depth) { // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); // } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 6ae1ce09..a5268211 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { void EmitBitCastU16F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitBitCastU32F32(EmitContext& ctx, Id value) { @@ -15,11 +15,11 @@ Id EmitBitCastU32F32(EmitContext& ctx, Id value) { } void EmitBitCastU64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitBitCastF16U16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitBitCastF32U32(EmitContext& ctx, Id value) { @@ -27,7 +27,7 @@ Id EmitBitCastF32U32(EmitContext& ctx, Id value) { } void EmitBitCastF64U64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitPackUint2x32(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index c7868006..74e736cf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -115,27 +115,27 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index } void EmitCompositeConstructF64x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitCompositeConstructF64x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitCompositeConstructF64x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitCompositeExtractF64x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitCompositeExtractF64x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitCompositeExtractF64x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 9793db13..34ab5e5f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -195,10 +195,36 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre } void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + const auto info = inst->Flags(); + const auto& buffer = ctx.buffers[handle]; + if (info.index_enable && info.offset_enable) { + UNREACHABLE(); + } else if (info.index_enable) { + for (u32 i = 0; i < 3; i++) { + const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; + const Id ptr{ + ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; + ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i)); + } + return; + } UNREACHABLE(); } void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + const auto info = inst->Flags(); + const auto& buffer = ctx.buffers[handle]; + if (info.index_enable && info.offset_enable) { + UNREACHABLE(); + } else if (info.index_enable) { + for (u32 i = 0; i < 4; i++) { + const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; + const Id ptr{ + ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; + ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i)); + } + return; + } UNREACHABLE(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index b04a5fd6..13db664c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -1,18 +1,34 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { +struct ImageOperands { + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast(static_cast(mask) | + static_cast(new_mask)); + operands.push_back(value); + } + + spv::ImageOperandsMask mask{}; + boost::container::static_vector operands; +}; + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); + ImageOperands operands; + if (Sirit::ValidId(offset)) { + operands.Add(spv::ImageOperandsMask::Offset, offset); + } + return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, @@ -25,9 +41,13 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, @@ -42,12 +62,16 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - throw NotImplementedException("SPIR-V Instruction"); + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref); } Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, @@ -83,21 +107,21 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod case AmdGpu::ImageType::Color3D: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); default: - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } } Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivatives, const IR::Value& offset, Id lod_clamp) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2774bc2c..c64d9a34 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -338,13 +338,13 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id ms); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 003857fb..49857e28 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -11,7 +11,7 @@ Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { } Id EmitSelectU8(EmitContext&, Id, Id, Id) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index ab86cae1..9c013dfe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -11,11 +11,11 @@ Id EmitUndefU1(EmitContext& ctx) { } Id EmitUndefU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitUndefU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitUndefU32(EmitContext& ctx) { @@ -23,7 +23,7 @@ Id EmitUndefU32(EmitContext& ctx) { } Id EmitUndefU64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 0844adbb..0f7f107f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -118,6 +118,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { switch (fmt) { case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Snorm: return ctx.F32[4]; case AmdGpu::NumberFormat::Sint: return ctx.S32[4]; @@ -137,6 +138,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f switch (fmt) { case AmdGpu::NumberFormat::Float: case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Snorm: return {id, input_f32, F32[1], 4}; case AmdGpu::NumberFormat::Uint: return {id, input_u32, U32[1], 4}; @@ -253,6 +255,7 @@ void EmitContext::DefineOutputs(const Info& info) { } void EmitContext::DefineBuffers(const Info& info) { + boost::container::small_vector type_ids; for (u32 i = 0; const auto& buffer : info.buffers) { const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; @@ -260,13 +263,15 @@ void EmitContext::DefineBuffers(const Info& info) { const u32 num_elements = stride * buffer.num_records; const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))}; const Id struct_type{TypeStruct(record_array_type)}; - Decorate(record_array_type, spv::Decoration::ArrayStride, 4); - - const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); - Name(struct_type, name); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { + Decorate(record_array_type, spv::Decoration::ArrayStride, 4); + const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); + Name(struct_type, name); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + } + type_ids.push_back(record_array_type); const auto storage_class = buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b95dd06d..1c8fd30b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -329,12 +329,16 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) break; case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_O: + case Opcode::IMAGE_SAMPLE_C: case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: case Opcode::IMAGE_SAMPLE_L: translator.IMAGE_SAMPLE(inst); break; + case Opcode::IMAGE_GATHER4_C: + translator.IMAGE_GATHER(inst); + break; case Opcode::IMAGE_STORE: translator.IMAGE_STORE(inst); break; @@ -450,16 +454,22 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.BUFFER_LOAD_FORMAT(1, false, inst); break; case Opcode::BUFFER_LOAD_FORMAT_XYZ: + case Opcode::BUFFER_LOAD_DWORDX3: translator.BUFFER_LOAD_FORMAT(3, false, inst); break; case Opcode::BUFFER_LOAD_FORMAT_XYZW: + case Opcode::BUFFER_LOAD_DWORDX4: translator.BUFFER_LOAD_FORMAT(4, false, inst); break; case Opcode::BUFFER_STORE_FORMAT_X: case Opcode::BUFFER_STORE_DWORD: translator.BUFFER_STORE_FORMAT(1, false, inst); break; + case Opcode::BUFFER_STORE_DWORDX3: + translator.BUFFER_STORE_FORMAT(3, false, inst); + break; case Opcode::BUFFER_STORE_FORMAT_XYZW: + case Opcode::BUFFER_STORE_DWORDX4: translator.BUFFER_STORE_FORMAT(4, false, inst); break; case Opcode::V_MAX_F32: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 8ae77ec6..ef5ff8b7 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -146,6 +146,7 @@ public: // MIMG void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); + void IMAGE_GATHER(const GcnInst& inst); void IMAGE_STORE(const GcnInst& inst); void IMAGE_LOAD(bool has_mip, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index fc2b453d..71ca7c2e 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -76,6 +76,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { info.has_bias.Assign(flags.test(MimgModifier::LodBias)); info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); info.force_level0.Assign(flags.test(MimgModifier::Level0)); + info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.explicit_lod.Assign(explicit_lod); // Issue IR instruction, leaving unknown fields blank to patch later. @@ -108,6 +109,74 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { } } +void Translator::IMAGE_GATHER(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + if (mimg.da) { + LOG_WARNING(Render_Vulkan, "Image instruction declares an array"); + } + + IR::VectorReg addr_reg{inst.src[0].code}; + IR::VectorReg dest_reg{inst.dst[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; + const auto flags = MimgModifierFlags(mimg.mod); + + // Load first dword of T# and S#. We will use them as the handle that will guide resource + // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture + // binding index. + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); + + // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction + // Set Architecture + const IR::Value offset = + flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; + const IR::F32 bias = + flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; + const IR::F32 dref = + flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; + + // Derivatives are tricky because their number depends on the texture type which is located in + // T#. We don't have access to T# though until resource tracking pass. For now assume no + // derivatives are present, otherwise we don't know where coordinates are placed in the address + // stream. + ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction"); + + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler + // Since these are at most 4 dwords, we load them into a single uvec4 and place them + // in coords field of the instruction. Then the resource tracking pass will patch the + // IR instruction to fill in lod_clamp field. + const IR::Value body = ir.CompositeConstruct( + ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), + ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + + const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); + + IR::TextureInstInfo info{}; + info.is_depth.Assign(flags.test(MimgModifier::Pcf)); + info.has_bias.Assign(flags.test(MimgModifier::LodBias)); + info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); + info.force_level0.Assign(flags.test(MimgModifier::Level0)); + info.explicit_lod.Assign(explicit_lod); + + // Issue IR instruction, leaving unknown fields blank to patch later. + const IR::Value texel = [&]() -> IR::Value { + const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; + if (!flags.test(MimgModifier::Pcf)) { + return ir.ImageGather(handle, body, offset, {}, info); + } + return ir.ImageGatherDref(handle, body, offset, {}, dref, info); + }(); + + for (u32 i = 0; i < 4; i++) { + if (((mimg.dmask >> i) & 1) == 0) { + continue; + } + const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; + ir.SetVectorReg(dest_reg++, value); + } +} + void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index dcdb2638..21f168da 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -198,6 +198,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) { void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { + static constexpr size_t MaxUboSize = 65536; IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto sharp = TrackSharp(producer); const auto buffer = info.ReadUd(sharp.sgpr_base, sharp.dword_offset); @@ -207,7 +208,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, .stride = buffer.GetStride(), .num_records = u32(buffer.num_records), .used_types = BufferDataType(inst), - .is_storage = IsBufferStore(inst), + .is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize, }); const auto inst_info = inst.Flags(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -252,25 +253,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - std::deque insts{&inst}; - const auto& pred = [](auto opcode) -> bool { - return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) - opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - opcode == IR::Opcode::GetUserData); - }; - - IR::Inst* producer{}; - while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) { - for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) { - const auto arg = producer->Arg(arg_idx); - if (arg.TryInstRecursive()) { - insts.push_back(arg.InstRecursive()); - } - } - insts.pop_front(); + IR::Inst* producer = inst.Arg(0).InstRecursive(); + while (producer->GetOpcode() == IR::Opcode::Phi) { + producer = producer->Arg(0).InstRecursive(); } - - ASSERT(pred(producer->GetOpcode())); + ASSERT(producer->GetOpcode() == + IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) + producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) + producer->GetOpcode() == IR::Opcode::GetUserData); const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair { if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { return std::make_pair(producer->Arg(0).InstRecursive(), @@ -335,6 +325,22 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); + if (inst_info.has_offset) { + // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. + const u32 arg_pos = inst_info.is_depth ? 4 : 3; + const IR::Value arg = inst.Arg(arg_pos); + ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); + const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; + union { + u32 raw; + BitField<0, 6, u32> x; + BitField<8, 6, u32> y; + BitField<16, 6, u32> z; + } offset{arg.U32()}; + const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y)); + inst.SetArg(arg_pos, value); + } + if (inst_info.has_lod_clamp) { // Final argument contains lod_clamp const u32 arg_pos = inst_info.is_depth ? 5 : 4; diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 1b268611..f3000528 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -38,6 +38,7 @@ union TextureInstInfo { BitField<2, 1, u32> has_lod_clamp; BitField<3, 1, u32> force_level0; BitField<4, 1, u32> explicit_lod; + BitField<5, 1, u32> has_offset; }; union BufferInstInfo { diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 196fad01..21f3602f 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -52,7 +52,7 @@ struct BufferResource { auto operator<=>(const BufferResource&) const = default; }; -using BufferResourceList = boost::container::static_vector; +using BufferResourceList = boost::container::static_vector; struct ImageResource { u32 sgpr_base; @@ -62,13 +62,13 @@ struct ImageResource { bool is_storage; bool is_depth; }; -using ImageResourceList = boost::container::static_vector; +using ImageResourceList = boost::container::static_vector; struct SamplerResource { u32 sgpr_base; u32 dword_offset; }; -using SamplerResourceList = boost::container::static_vector; +using SamplerResourceList = boost::container::static_vector; struct Info { struct VsInput { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5f0c923e..f97f2d6c 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -187,6 +187,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset; + const auto* payload = reinterpret_cast(header + 2); + std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); + break; + } case PM4ItOpcode::SetContextReg: { const auto* set_data = reinterpret_cast(header); const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index bcc1187a..4767e59f 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -43,6 +43,7 @@ struct Liverpool { static constexpr u32 NumShaderUserData = 16; static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 ContextRegWordOffset = 0xA000; + static constexpr u32 ConfigRegWordOffset = 0x2000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; @@ -789,6 +790,7 @@ struct Liverpool { u32 raw; BitField<0, 1, u32> depth_clear_enable; BitField<1, 1, u32> stencil_clear_enable; + BitField<6, 1, u32> depth_compress_disable; }; union AaConfig { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index b0b64570..224666d5 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -366,6 +366,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR8G8Unorm; } + if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc7UnormBlock; + } if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc2UnormBlock; } @@ -376,9 +379,15 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eA2R10G10B10UnormPack32; } + if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eA2R10G10B10SnormPack32; + } if (data_format == AmdGpu::DataFormat::Format10_11_11 && num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eB10G11R11UfloatPack32; } + if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR16G16Sfloat; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index fb6bc248..a2b8892c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -381,6 +381,7 @@ Frame* RendererVulkan::GetRenderFrame() { { std::unique_lock lock{free_mutex}; free_cv.wait(lock, [this] { return !free_queue.empty(); }); + LOG_INFO(Render_Vulkan, "Got render frame, remaining {}", free_queue.size() - 1); // Take the frame from the queue frame = free_queue.front(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0de0f425..d7e68712 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -85,7 +85,7 @@ ComputePipeline::~ComputePipeline() = default; bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. - boost::container::static_vector buffer_infos; + boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; @@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s // need its full emulation anyways. For cases of metadata read a warning will be logged. if (buffer.is_storage) { if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + LOG_WARNING(Render_Vulkan, "Metadata update skipped"); return false; } } else { @@ -127,7 +127,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s for (const auto& image : info.images) { const auto tsharp = info.ReadUd(image.sgpr_base, image.dword_offset); - const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage); + const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0fd7e5e5..d3a7df05 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -187,7 +187,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { .colorAttachmentCount = num_color_formats, .pColorAttachmentFormats = key.color_formats.data(), - .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined, + .depthAttachmentFormat = key.depth_format, .stencilAttachmentFormat = vk::Format::eUndefined, }; @@ -320,7 +320,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& // Bind resource buffers and textures. boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; + boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; @@ -350,9 +350,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& for (const auto& image : stage.images) { const auto tsharp = stage.ReadUd(image.sgpr_base, image.dword_offset); - const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage); + const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage, image.is_depth); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - vk::ImageLayout::eShaderReadOnlyOptimal); + (image.is_storage || image.is_depth) ? vk::ImageLayout::eGeneral + : vk::ImageLayout::eShaderReadOnlyOptimal); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index ecc27314..b6cffc1a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -205,6 +205,7 @@ bool Instance::CreateDevice() { .logicOp = features.logicOp, .samplerAnisotropy = features.samplerAnisotropy, .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, + .shaderImageGatherExtended = true, .shaderStorageImageMultisample = true, .shaderClipDistance = features.shaderClipDistance, }, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7858758d..8fbe7ac2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -117,8 +117,8 @@ void PipelineCache::RefreshGraphicsKey() { key.num_samples = regs.aa_config.NumSamples(); const auto& db = regs.depth_buffer; + key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); if (key.depth.depth_enable) { - key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); } @@ -206,6 +206,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { block_pool.ReleaseContents(); inst_pool.ReleaseContents(); + if (hash == 0xa34c48f8) { + printf("bad\n"); + } + // Recompile shader to IR. try { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); @@ -214,12 +218,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { // Compile IR to SPIR-V auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); - stages[i] = CompileSPV(spv_code, instance.GetDevice()); - infos[i] = &programs[i].info; - if (Config::dumpShaders()) { DumpShader(spv_code, hash, stage, "spv"); } + stages[i] = CompileSPV(spv_code, instance.GetDevice()); + infos[i] = &programs[i].info; } catch (const Shader::Exception& e) { UNREACHABLE_MSG("{}", e.what()); } @@ -246,22 +249,25 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. - LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); - const Shader::Info info = - MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); - auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); + try { + LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); + const Shader::Info info = + MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs); + auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); - // Compile IR to SPIR-V - u32 binding{}; - const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding); - const auto module = CompileSPV(spv_code, instance.GetDevice()); - - if (Config::dumpShaders()) { - DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv"); + // Compile IR to SPIR-V + u32 binding{}; + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, program, binding); + if (Config::dumpShaders()) { + DumpShader(spv_code, compute_key, Shader::Stage::Compute, "spv"); + } + const auto module = CompileSPV(spv_code, instance.GetDevice()); + return std::make_unique(instance, scheduler, *pipeline_cache, &program.info, + module); + } catch (const Shader::Exception& e) { + UNREACHABLE_MSG("{}", e.what()); + return nullptr; } - - return std::make_unique(instance, scheduler, *pipeline_cache, &program.info, - module); } void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7086b23e..82a938ae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } @@ -46,71 +46,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { pipeline->BindResources(memory, vertex_index_buffer, texture_cache); - boost::container::static_vector - color_attachments{}; - for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { - const auto& col_buf = regs.color_buffers[col_buf_id]; - if (!col_buf) { - continue; - } - - const auto& hint = liverpool->last_cb_extent[col_buf_id]; - const auto& image_view = texture_cache.RenderTarget(col_buf, hint); - - const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); - color_attachments.push_back({ - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, - }); - texture_cache.TouchMeta(col_buf.CmaskAddress(), false); - } - - vk::RenderingAttachmentInfo depth_attachment{}; - u32 num_depth_attachments{}; - if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) { - const auto htile_address = regs.depth_htile_data_base.GetAddress(); - const bool is_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address); - const auto& image_view = - texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent); - depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eGeneral, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, - .stencil = regs.stencil_clear}}, - }; - texture_cache.TouchMeta(htile_address, false); - num_depth_attachments++; - } - - // TODO: Don't restart renderpass every draw - const auto& scissor = regs.screen_scissor; - vk::RenderingInfo rendering_info = { - .renderArea = - { - .offset = {scissor.top_left_x, scissor.top_left_y}, - .extent = {scissor.GetWidth(), scissor.GetHeight()}, - }, - .layerCount = 1, - .colorAttachmentCount = static_cast(color_attachments.size()), - .pColorAttachments = color_attachments.data(), - .pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr, - }; - auto& area = rendering_info.renderArea.extent; - if (area.width == 2048) { - area.width = 1920; - area.height = 1080; - } - + BeginRendering(); UpdateDynamicState(*pipeline); - cmdbuf.beginRendering(rendering_info); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); @@ -120,7 +58,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { : regs.num_indices; cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } - cmdbuf.endRendering(); } void Rasterizer::DispatchDirect() { @@ -138,15 +75,66 @@ void Rasterizer::DispatchDirect() { return; } + scheduler.EndRendering(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); } +void Rasterizer::BeginRendering() { + const auto& regs = liverpool->regs; + RenderState state; + + for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { + const auto& col_buf = regs.color_buffers[col_buf_id]; + if (!col_buf) { + continue; + } + + const auto& hint = liverpool->last_cb_extent[col_buf_id]; + const auto& image_view = texture_cache.RenderTarget(col_buf, hint); + state.width = std::min(state.width, hint.width); + state.height = std::min(state.height, hint.height); + + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); + state.color_attachments[state.num_color_attachments++] = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = + is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, + }; + texture_cache.TouchMeta(col_buf.CmaskAddress(), false); + } + + if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald && + regs.depth_buffer.Address() != 0) { + const auto htile_address = regs.depth_htile_data_base.GetAddress(); + const bool is_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address); + const auto& hint = liverpool->last_db_extent; + const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, htile_address, hint); + state.width = std::min(state.width, hint.width); + state.height = std::min(state.height, hint.height); + state.depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, + .stencil = regs.stencil_clear}}, + }; + texture_cache.TouchMeta(htile_address, false); + state.num_depth_attachments++; + } + scheduler.BeginRendering(state); +} + u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) { // Emulate QuadList primitive type with CPU made index buffer. const auto& regs = liverpool->regs; if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) { - ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); + //ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); is_indexed = true; // Emit indices. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2ff4c244..7bf1ab9b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -37,6 +37,8 @@ private: u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset); void MapMemory(VAddr addr, size_t size); + void BeginRendering(); + void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); void UpdateDepthStencilState(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 7ed311f7..51cb4690 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -18,6 +18,37 @@ Scheduler::~Scheduler() { std::free(profiler_scope); } +void Scheduler::BeginRendering(const RenderState& new_state) { + if (is_rendering && render_state == new_state) { + return; + } + EndRendering(); + is_rendering = true; + render_state = new_state; + + const vk::RenderingInfo rendering_info = { + .renderArea = { + .offset = {0, 0}, + .extent = {render_state.width, render_state.height}, + }, + .layerCount = 1, + .colorAttachmentCount = static_cast(render_state.color_attachments.size()), + .pColorAttachments = render_state.color_attachments.data(), + .pDepthAttachment = render_state.num_depth_attachments ? + &render_state.depth_attachment : nullptr, + }; + + current_cmdbuf.beginRendering(rendering_info); +} + +void Scheduler::EndRendering() { + if (!is_rendering) { + return; + } + is_rendering = false; + current_cmdbuf.endRendering(); +} + void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { // When flushing, we only send data to the worker thread; no waiting is necessary. SubmitExecution(signal, wait); @@ -55,6 +86,7 @@ void Scheduler::AllocateWorkerCommandBuffers() { } void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { + std::scoped_lock lk{submit_mutex}; const u64 signal_value = master_semaphore.NextTick(); auto* profiler_ctx = instance.GetProfilerContext(); @@ -63,7 +95,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa TracyVkCollect(profiler_ctx, current_cmdbuf); } - std::scoped_lock lk{submit_mutex}; + EndRendering(); master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); master_semaphore.Refresh(); AllocateWorkerCommandBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 284c288a..7bf50622 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/types.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -12,6 +13,19 @@ namespace Vulkan { class Instance; +struct RenderState { + std::array color_attachments{}; + vk::RenderingAttachmentInfo depth_attachment{}; + u32 num_color_attachments{}; + u32 num_depth_attachments{}; + u32 width = std::numeric_limits::max(); + u32 height = std::numeric_limits::max(); + + bool operator==(const RenderState& other) const noexcept { + return std::memcmp(this, &other, sizeof(RenderState)) == 0; + } +}; + class Scheduler { public: explicit Scheduler(const Instance& instance); @@ -26,6 +40,12 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); + /// Starts a new rendering scope with provided state. + void BeginRendering(const RenderState& new_state); + + /// Ends current rendering scope. + void EndRendering(); + /// Returns the current command buffer. vk::CommandBuffer CommandBuffer() const { return current_cmdbuf; @@ -59,6 +79,8 @@ private: CommandPool command_pool; vk::CommandBuffer current_cmdbuf; std::condition_variable_any event_cv; + RenderState render_state; + bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; }; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b1c2362f..0345ea0f 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -221,6 +221,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + ASSERT(info.pixel_format != vk::Format::eUndefined); vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && @@ -272,7 +273,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask) { +void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::CommandBuffer cmdbuf) { if (dst_layout == layout && dst_mask == access_mask) { return; } @@ -300,7 +302,12 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags ds dst_mask == vk::AccessFlagBits::eTransferWrite) ? vk::PipelineStageFlagBits::eTransfer : vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader; - const auto cmdbuf = scheduler->CommandBuffer(); + + if (!cmdbuf) { + // When using external cmdbuf you are responsible for ending rp. + scheduler->EndRendering(); + cmdbuf = scheduler->CommandBuffer(); + } cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); @@ -310,6 +317,7 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags ds } void Image::Upload(vk::Buffer buffer, u64 offset) { + scheduler->EndRendering(); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); // Copy to the image. @@ -318,7 +326,7 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { .bufferRowLength = info.pitch, .bufferImageHeight = info.size.height, .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, + .aspectMask = aspect_mask, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 2067cde3..e91f1b58 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -132,7 +132,8 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); const Vulkan::Instance* instance; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 7d6e2960..ba45c493 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -80,8 +80,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info // When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it. vk::Format format = info.format; + vk::ImageAspectFlags aspect = image.aspect_mask; if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) { - format = vk::Format::eD32Sfloat; + format = image.info.pixel_format; + aspect = vk::ImageAspectFlagBits::eDepth; } const vk::ImageViewCreateInfo image_view_ci = { @@ -91,7 +93,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .format = format, .components = info.mapping, .subresourceRange{ - .aspectMask = image.aspect_mask, + .aspectMask = aspect, .baseMipLevel = 0U, .levelCount = 1, .baseArrayLayer = 0, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 21127c2b..fdfab4ea 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -116,10 +116,15 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { - if (image.cpu_addr == cpu_address && image.info.size.width == info.size.width && - image.info.IsDepthStencil() == info.IsDepthStencil()) { - image_ids.push_back(image_id); + // Address and width must match. + if (image.cpu_addr != cpu_address || image.info.size.width != info.size.width) { + return; } + if (info.IsDepthStencil() != image.info.IsDepthStencil() && + info.pixel_format != vk::Format::eR32Sfloat) { + return; + } + image_ids.push_back(image_id); }); ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); @@ -129,7 +134,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool re image_id = slot_images.insert(instance, scheduler, info, cpu_address); RegisterImage(image_id); } else { - image_id = image_ids[0]; + image_id = image_ids.size() > 1 ? image_ids[1] : image_ids[0]; } RegisterMeta(info, image_id); @@ -163,11 +168,11 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi return slot_image_views[view_id]; } -ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) { +ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage, bool is_depth) { const ImageInfo info{desc}; Image& image = FindImage(info, desc.Address()); - if (is_storage) { + if (is_storage || is_depth) { image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); image.info.usage.storage = true; } else { @@ -202,7 +207,7 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe auto& image = FindImage(info, buffer.Address(), false); image.flags &= ~ImageFlagBits::CpuModified; - image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal, + image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); @@ -261,6 +266,8 @@ void TextureCache::RefreshImage(Image& image) { .imageExtent = {width, height, 1}, }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8778f3e8..00374697 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,7 +52,8 @@ public: bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage); + [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage, + bool is_depth); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 0b6fd0eb..3a431231 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -231,7 +231,7 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler}, - staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} { + staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,