diff --git a/CMakeLists.txt b/CMakeLists.txt index ba180f89..357476d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,8 +117,6 @@ set(KERNEL_LIB src/core/libraries/kernel/libkernel.h src/core/libraries/kernel/memory_management.cpp src/core/libraries/kernel/memory_management.h - src/core/libraries/kernel/physical_memory.cpp - src/core/libraries/kernel/physical_memory.h src/core/libraries/kernel/thread_management.cpp src/core/libraries/kernel/thread_management.h src/core/libraries/kernel/time_management.cpp diff --git a/src/core/libraries/kernel/physical_memory.cpp b/src/core/libraries/kernel/physical_memory.cpp deleted file mode 100644 index d81bbe5f..00000000 --- a/src/core/libraries/kernel/physical_memory.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/alignment.h" -#include "core/libraries/kernel/physical_memory.h" - -namespace Libraries::Kernel { - -bool PhysicalMemory::Alloc(u64 searchStart, u64 searchEnd, u64 len, u64 alignment, u64* physAddrOut, - int memoryType) { - std::scoped_lock lock{m_mutex}; - u64 find_free_pos = 0; - - // Iterate through allocated blocked and find the next free position - for (const auto& block : m_allocatedBlocks) { - u64 n = block.start_addr + block.size; - if (n > find_free_pos) { - find_free_pos = n; - } - } - - // Align free position - find_free_pos = Common::AlignUp(find_free_pos, alignment); - - // If the new position is between searchStart - searchEnd , allocate a new block - if (find_free_pos >= searchStart && find_free_pos + len <= searchEnd) { - AllocatedBlock block{}; - block.size = len; - block.start_addr = find_free_pos; - block.memoryType = memoryType; - block.map_size = 0; - block.map_virtual_addr = 0; - block.prot = 0; - block.cpu_mode = VirtualMemory::MemoryMode::NoAccess; - - m_allocatedBlocks.push_back(block); - - *physAddrOut = find_free_pos; - return true; - } - - return false; -} - -bool PhysicalMemory::Map(u64 virtual_addr, u64 phys_addr, u64 len, int prot, - VirtualMemory::MemoryMode cpu_mode) { - std::scoped_lock lock{m_mutex}; - for (auto& b : m_allocatedBlocks) { - if (phys_addr >= b.start_addr && phys_addr < b.start_addr + b.size) { - if (b.map_virtual_addr != 0 || b.map_size != 0) { - return false; - } - - b.map_virtual_addr = virtual_addr; - b.map_size = len; - b.prot = prot; - b.cpu_mode = cpu_mode; - - return true; - } - } - - return false; -} - -} // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/physical_memory.h b/src/core/libraries/kernel/physical_memory.h deleted file mode 100644 index 27ef0666..00000000 --- a/src/core/libraries/kernel/physical_memory.h +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include "common/types.h" -#include "core/virtual_memory.h" - -namespace Libraries::Kernel { - -class PhysicalMemory { -public: - struct AllocatedBlock { - u64 start_addr; - u64 size; - int memoryType; - u64 map_virtual_addr; - u64 map_size; - int prot; - VirtualMemory::MemoryMode cpu_mode; - }; - PhysicalMemory() {} - virtual ~PhysicalMemory() {} - -public: - bool Alloc(u64 searchStart, u64 searchEnd, u64 len, u64 alignment, u64* physAddrOut, - int memoryType); - bool Map(u64 virtual_addr, u64 phys_addr, u64 len, int prot, - VirtualMemory::MemoryMode cpu_mode); - -private: - std::vector m_allocatedBlocks; - std::mutex m_mutex; -}; - -} // namespace Libraries::Kernel diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8ecd311b..acae3b52 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -35,7 +35,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, } // Align free position - free_addr = Common::AlignUp(free_addr, alignment); + free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; ASSERT(free_addr >= search_start && free_addr + size <= search_end); // Add the allocated region to the list and commit its pages. diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index ac8f22af..7c21e6fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -12,13 +12,17 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - const auto info = inst->Flags(); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, + spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -26,9 +30,14 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va throw NotImplementedException("SPIR-V Instruction"); } -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, + Id bias_lc, Id offset) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, + spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 728dd2bc..33a44935 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -334,12 +334,12 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, + Id bias_lc, Id offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index bd289ffb..893df1e9 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -149,7 +149,8 @@ void CFG::LinkBlocks() { block.end_class = EndClass::Branch; } else if (end_inst.opcode == Opcode::S_ENDPGM) { const auto& prev_inst = inst_list[block.end_index - 1]; - if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) { + if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0 && + prev_inst.control.exp.target != 9) { block.end_class = EndClass::Kill; } else { block.end_class = EndClass::Exit; diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 14028b76..2cf5c5b2 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -18,10 +18,18 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { } void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { + static constexpr u32 SQ_SRC_LITERAL = 0xFF; const auto& smrd = inst.control.smrd; const IR::ScalarReg sbase{inst.src[0].code * 2}; - const IR::U32 dword_offset = - smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset)); + const IR::U32 dword_offset = [&] -> IR::U32 { + if (smrd.imm) { + return ir.Imm32(smrd.offset); + } + if (smrd.offset == SQ_SRC_LITERAL) { + return ir.Imm32(inst.src[1].code); + } + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); + }(); const IR::Value vsharp = ir.GetScalarReg(sbase); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2abc87a6..510b4b28 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -129,7 +129,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { } break; case OperandField::VccHi: - value = ir.GetVccHi(); + if (force_flt) { + value = ir.BitCast(ir.GetVccHi()); + } else { + value = ir.GetVccHi(); + } break; default: UNREACHABLE(); @@ -297,6 +301,8 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MADAK_F32: // Yes these can share the opcode translator.V_FMA_F32(inst); break; + case Opcode::IMAGE_SAMPLE_C_LZ: + case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: translator.IMAGE_SAMPLE(inst); break; @@ -351,9 +357,15 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_CMP_LG_U32: translator.S_CMP(ConditionOp::LG, false, inst); break; + case Opcode::S_CMP_LT_I32: + translator.S_CMP(ConditionOp::LT, true, inst); + break; case Opcode::S_CMP_LG_I32: translator.S_CMP(ConditionOp::LG, true, inst); break; + case Opcode::S_CMP_GT_I32: + translator.S_CMP(ConditionOp::GT, true, inst); + break; case Opcode::S_CMP_EQ_I32: translator.S_CMP(ConditionOp::EQ, true, inst); break; @@ -387,6 +399,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_SIN_F32: translator.V_SIN_F32(inst); break; + case Opcode::V_COS_F32: + translator.V_COS_F32(inst); + break; case Opcode::V_LOG_F32: translator.V_LOG_F32(inst); break; @@ -522,6 +537,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_RNDNE_F32: translator.V_RNDNE_F32(inst); break; + case Opcode::V_BCNT_U32_B32: + translator.V_BCNT_U32_B32(inst); + break; case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6fd8e3f5..870cb3aa 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -104,6 +104,8 @@ public: void V_ASHRREV_I32(const GcnInst& inst); void V_MAD_U32_U24(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst); + void V_BCNT_U32_B32(const GcnInst& inst); + void V_COS_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7484da57..0a3ec92e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -25,8 +25,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { } void Translator::V_MUL_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { @@ -372,4 +371,15 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPRoundEven(src0)); } +void Translator::V_BCNT_U32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1)); +} + +void Translator::V_COS_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPCos(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 68b4fb11..36e816fb 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -148,6 +148,9 @@ private: } // Anonymous namespace SharpLocation TrackSharp(const IR::Inst* inst) { + while (inst->GetOpcode() == IR::Opcode::Phi) { + inst = inst->Arg(0).InstRecursive(); + } if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ .sgpr_base = u32(IR::ScalarReg::Max), @@ -163,6 +166,12 @@ SharpLocation TrackSharp(const IR::Inst* inst) { // Retrieve SGPR pair that holds sbase const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive(); const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive(); + while (sbase0->GetOpcode() == IR::Opcode::Phi) { + sbase0 = sbase0->Arg(0).TryInstRecursive(); + } + while (sbase1->GetOpcode() == IR::Opcode::Phi) { + sbase1 = sbase1->Arg(0).TryInstRecursive(); + } ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData && sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported"); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b561b712..e4d22cdc 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,10 +10,8 @@ #include "video_core/amdgpu/pixel_format.h" #include -#include #include -#include -#include +#include #include #include #include @@ -333,6 +331,14 @@ struct Liverpool { u32 Height() const { return (depth_size.height_tile_max + 1) << 3; } + + u64 Address() const { + return u64(z_read_base) << 8; + } + + [[nodiscard]] size_t GetSizeAligned() const { + return depth_slice.tile_max * 8; + } }; enum class ClipSpace : u32 { @@ -506,8 +512,8 @@ struct Liverpool { float xoffset; float yscale; float yoffset; - float zoffset; float zscale; + float zoffset; }; union ViewportControl { @@ -564,6 +570,7 @@ struct Liverpool { Subtract = 1, Min = 2, Max = 3, + ReverseSubtract = 4, }; BitField<0, 5, BlendFactor> color_src_factor; @@ -612,7 +619,7 @@ struct Liverpool { BitField<0, 2, EndianSwap> endian; BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberFormat> number_type; + BitField<8, 3, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; @@ -680,7 +687,7 @@ struct Liverpool { NumberFormat NumFormat() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb : info.number_type; } }; @@ -717,6 +724,14 @@ struct Liverpool { CbColor7Base = 0xA381, }; + struct PolygonOffset { + float depth_bias; + float front_scale; + float front_offset; + float back_scale; + float back_offset; + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -726,8 +741,8 @@ struct Liverpool { INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16); ComputeProgram cs_program; INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80); - u32 depth_bounds_min; - u32 depth_bounds_max; + float depth_bounds_min; + float depth_bounds_max; u32 stencil_clear; u32 depth_clear; Scissor screen_scissor; @@ -776,7 +791,9 @@ struct Liverpool { IndexBufferType index_buffer_type; INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); u32 enable_primitive_id; - INSERT_PADDING_WORDS(0xA318 - 0xA2A1 - 1); + INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1); + PolygonOffset poly_offset; + INSERT_PADDING_WORDS(0xA318 - 0xA2DF - 5); ColorBuffer color_buffers[NumColorBuffers]; INSERT_PADDING_WORDS(0xC242 - 0xA390); PrimitiveType primitive_type; @@ -930,6 +947,7 @@ static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); +static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 58e54118..ba2231b0 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -148,10 +148,20 @@ struct Image { } u32 NumLayers() const { - return last_array - base_array + 1; + u32 slices = type == ImageType::Color3D ? 1 : depth.Value() + 1; + if (type == ImageType::Cube) { + slices *= 6; + } + if (pow2pad) { + slices = std::bit_ceil(slices); + } + return slices; } u32 NumLevels() const { + if (type == ImageType::Color2DMsaa || type == ImageType::Color2DMsaaArray) { + return 1; + } return last_level + 1; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index eb319f09..000063d5 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -176,6 +176,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { return vk::BlendOp::eMin; case BlendFunc::Max: return vk::BlendOp::eMax; + case BlendFunc::ReverseSubtract: + return vk::BlendOp::eReverseSubtract; default: UNREACHABLE(); } @@ -316,7 +318,23 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eBc7SrgbBlock; } - UNREACHABLE(); + if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc1RgbaUnormBlock; + } + if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc3UnormBlock; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR8G8B8A8Uint; + } + if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR16Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32Sfloat; + } + UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { @@ -328,6 +346,14 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format == DepthBuffer::StencilFormat::Invalid) { return vk::Format::eD32Sfloat; } + if (z_format == DepthBuffer::ZFormat::Z16 && + stencil_format == DepthBuffer::StencilFormat::Invalid) { + return vk::Format::eD16Unorm; + } + if (z_format == DepthBuffer::ZFormat::Z16 && + stencil_format == DepthBuffer::StencilFormat::Stencil8) { + return vk::Format::eD16UnormS8Uint; + } UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c92bf7fe..fe464f0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -64,8 +64,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = attributes.data(), }; - ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(), - "Rectangle List primitive type is only supported for embedded VS"); + if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { + LOG_WARNING(Render_Vulkan, + "Rectangle List primitive type is only supported for embedded VS"); + } const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), @@ -76,11 +78,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .depthClampEnable = false, .rasterizerDiscardEnable = false, .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), - .cullMode = vk::CullModeFlagBits::eNone, /*LiverpoolToVK::CullMode(key.cull_mode),*/ + .cullMode = LiverpoolToVK::CullMode(key.cull_mode), .frontFace = key.front_face == Liverpool::FrontFace::Clockwise ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise, - .depthBiasEnable = false, + .depthBiasEnable = bool(key.depth_bias_enable), + .depthBiasConstantFactor = key.depth_bias_const_factor, + .depthBiasClamp = key.depth_bias_clamp, + .depthBiasSlopeFactor = key.depth_bias_slope_factor, .lineWidth = 1.0f, }; @@ -103,7 +108,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .extent = {1, 1}, }; + const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { + .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, + }; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .pNext = &clip_control, .viewportCount = 1, .pViewports = &viewport, .scissorCount = 1, @@ -150,6 +160,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .writeMask = key.stencil_ref_back.stencil_write_mask, .reference = key.stencil_ref_back.stencil_test_val, }, + .minDepthBounds = key.depth_bounds_min, + .maxDepthBounds = key.depth_bounds_max, }; u32 shader_count = 1; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e93ea6f4..ff512406 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,6 +32,12 @@ struct GraphicsPipelineKey { vk::Format depth_format; Liverpool::DepthControl depth; + float depth_bounds_min; + float depth_bounds_max; + float depth_bias_const_factor; + float depth_bias_slope_factor; + float depth_bias_clamp; + u32 depth_bias_enable; Liverpool::StencilControl stencil; Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_back; @@ -39,7 +45,7 @@ struct GraphicsPipelineKey { Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; Liverpool::FrontFace front_face; - u32 pad{}; + Liverpool::ClipSpace clip_space; std::array blend_controls; std::array write_masks; @@ -47,7 +53,6 @@ struct GraphicsPipelineKey { return std::memcmp(this, &key, sizeof(key)) == 0; } }; -static_assert(std::has_unique_object_representations_v); class GraphicsPipeline { public: diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 355d2603..6d19452d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -155,6 +155,8 @@ bool Instance::CreateDevice() { custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); @@ -227,6 +229,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ .extendedDynamicState3ColorWriteMask = true, }, + vk::PhysicalDeviceDepthClipControlFeaturesEXT{ + .depthClipControl = true, + }, }; if (!color_write_en) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 11cd5419..441d0b78 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -88,12 +88,26 @@ void PipelineCache::RefreshGraphicsKey() { auto& key = graphics_key; key.depth = regs.depth_control; + key.depth_bounds_min = regs.depth_bounds_min; + key.depth_bounds_max = regs.depth_bounds_max; + key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back || + regs.polygon_control.enable_polygon_offset_front || + regs.polygon_control.enable_polygon_offset_para; + if (regs.polygon_control.enable_polygon_offset_front) { + key.depth_bias_const_factor = regs.poly_offset.front_offset; + key.depth_bias_slope_factor = regs.poly_offset.front_scale; + } else { + key.depth_bias_const_factor = regs.poly_offset.back_offset; + key.depth_bias_slope_factor = regs.poly_offset.back_scale; + } + key.depth_bias_clamp = regs.poly_offset.depth_bias; key.stencil = regs.stencil_control; key.stencil_ref_front = regs.stencil_ref_front; key.stencil_ref_back = regs.stencil_ref_back; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); key.cull_mode = regs.polygon_control.CullingMode(); + key.clip_space = regs.clipper_control.clip_space; key.front_face = regs.polygon_control.front_face; const auto& db = regs.depth_buffer; @@ -103,6 +117,9 @@ void PipelineCache::RefreshGraphicsKey() { // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. // We need to do some arrays compaction at this stage + key.color_formats.fill(vk::Format::eUndefined); + key.blend_controls.fill({}); + key.write_masks.fill({}); int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -112,6 +129,8 @@ void PipelineCache::RefreshGraphicsKey() { key.color_formats[remapped_cb] = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; ++remapped_cb; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d49e7138..c1340d08 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -41,6 +41,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { boost::container::static_vector color_attachments{}; + vk::RenderingAttachmentInfo depth_attachment{}; + u32 num_depth_attachments{}; for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { @@ -57,6 +59,17 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .storeOp = vk::AttachmentStoreOp::eStore, }); } + if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { + const auto& image_view = + texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); + depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + }; + num_depth_attachments++; + } // TODO: Don't restart renderpass every draw const auto& scissor = regs.screen_scissor; @@ -69,6 +82,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .layerCount = 1, .colorAttachmentCount = static_cast(color_attachments.size()), .pColorAttachments = color_attachments.data(), + .pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr, }; UpdateDynamicState(*pipeline); @@ -78,7 +92,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices; + const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList + ? 4 + : regs.num_indices; cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); @@ -156,13 +172,15 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { void Rasterizer::UpdateViewportScissorState() { auto& regs = liverpool->regs; + const float reduce_z = + regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW ? 1.0f : 0.0f; const auto cmdbuf = scheduler.CommandBuffer(); const vk::Viewport viewport{ .x = regs.viewports[0].xoffset - regs.viewports[0].xscale, .y = regs.viewports[0].yoffset - regs.viewports[0].yscale, .width = regs.viewports[0].xscale * 2.0f, .height = regs.viewports[0].yscale * 2.0f, - .minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale, + .minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale * reduce_z, .maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset, }; const vk::Rect2D scissor{ diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 9bf3ec0a..fc11e6cb 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -33,11 +33,23 @@ static vk::Format ConvertPixelFormat(const VideoOutFormat format) { return {}; } +static bool IsDepthStencilFormat(vk::Format format) { + switch (format) { + case vk::Format::eD16Unorm: + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD32Sfloat: + case vk::Format::eD32SfloatS8Uint: + return true; + default: + return false; + } +} + static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) { vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; - if (false /*&& IsDepthStencilFormat(format)*/) { + if (IsDepthStencilFormat(format)) { usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { if (format != vk::Format::eBc3SrgbBlock) { @@ -54,9 +66,9 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Cube: + case AmdGpu::ImageType::Color2DArray: return vk::ImageType::e2D; case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Color2DArray: return vk::ImageType::e3D; default: UNREACHABLE(); @@ -98,6 +110,18 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, guest_size_bytes = buffer.GetSizeAligned(); } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { + is_tiled = false; + pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); + type = vk::ImageType::e2D; + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); + size.depth = 1; + pitch = size.width; + guest_size_bytes = buffer.GetSizeAligned(); +} + ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { is_tiled = image.IsTiled(); tiling_mode = image.GetTilingMode(); @@ -165,6 +189,13 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, info.usage |= vk::ImageUsageFlagBits::eStorage; } + if (info.pixel_format == vk::Format::eD32Sfloat) { + aspect_mask = vk::ImageAspectFlagBits::eDepth; + } + if (info.pixel_format == vk::Format::eD32SfloatS8Uint) { + aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + } + const vk::ImageCreateInfo image_ci = { .flags = flags, .imageType = info.type, @@ -187,7 +218,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, if (info.is_tiled) { ImageViewInfo view_info; view_info.format = DemoteImageFormatForDetiling(info.pixel_format); - view_for_detiler.emplace(*instance, view_info, image); + view_for_detiler.emplace(*instance, view_info, *this); } Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); @@ -198,23 +229,25 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags ds return; } - const vk::ImageMemoryBarrier barrier = {.srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }}; + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = access_mask, + .dstAccessMask = dst_mask, + .oldLayout = layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; // Adjust pipieline stage - vk::PipelineStageFlags dst_pl_stage = + const vk::PipelineStageFlags dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead || dst_mask == vk::AccessFlagBits::eTransferWrite) ? vk::PipelineStageFlagBits::eTransfer diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c357f8a2..64bcfbd3 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -38,6 +38,8 @@ struct ImageInfo { explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 919415e8..52fb28a4 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -3,6 +3,7 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" namespace VideoCore { @@ -58,7 +59,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } -ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image, +ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, std::optional usage_override /*= {}*/) : info{info_} { vk::ImageViewUsageCreateInfo usage_ci{}; @@ -66,14 +67,20 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info usage_ci.usage = usage_override.value(); } + // When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it. + vk::Format format = info.format; + if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) { + format = vk::Format::eD32Sfloat; + } + const vk::ImageViewCreateInfo image_view_ci = { .pNext = usage_override.has_value() ? &usage_ci : nullptr, - .image = image, + .image = image.image, .viewType = info.type, - .format = info.format, + .format = format, .components = info.mapping, .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, + .aspectMask = image.aspect_mask, .baseMipLevel = 0U, .levelCount = 1, .baseArrayLayer = 0, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index ae1f9ba0..2e15e1a1 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -28,8 +28,10 @@ struct ImageViewInfo { auto operator<=>(const ImageViewInfo&) const = default; }; +struct Image; + struct ImageView { - explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image, + explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, std::optional usage_override = {}); ~ImageView(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 5c371112..fd6767b7 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -93,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ASSERT(null_id.index == 0); ImageViewInfo view_info; - void(slot_image_views.insert(instance, view_info, slot_images[null_id].image)); + void(slot_image_views.insert(instance, view_info, slot_images[null_id])); } TextureCache::~TextureCache() { @@ -112,7 +112,7 @@ void TextureCache::OnCpuWrite(VAddr address) { }); } -Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { +Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) { std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { @@ -132,7 +132,8 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { } Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified)) { + if (True(image.flags & ImageFlagBits::CpuModified) && + (!image_ids.empty() || refresh_on_create)) { RefreshImage(image); TrackImage(image, image_id); } @@ -153,8 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage; } - const ImageViewId view_id = - slot_image_views.insert(instance, view_info, image.image, usage_override); + const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; @@ -177,6 +177,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; auto& image = FindImage(info, buffer.Address()); + image.flags &= ~ImageFlagBits::CpuModified; image.Transit(vk::ImageLayout::eColorAttachmentOptimal, vk::AccessFlagBits::eColorAttachmentWrite | @@ -187,6 +188,17 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff return RegisterImageView(image, view_info); } +ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) { + const ImageInfo info{buffer, hint}; + auto& image = FindImage(info, buffer.Address(), false); + image.flags &= ~ImageFlagBits::CpuModified; + + ImageViewInfo view_info; + view_info.format = info.pixel_format; + return RegisterImageView(image, view_info); +} + void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4dbff73..20ae5bd5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,8 @@ public: void OnCpuWrite(VAddr address); /// Retrieves the image handle of the image with the provided attributes and address. - [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address); + [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, + bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image); @@ -45,6 +46,8 @@ public: /// Retrieves the render target with specified properties [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint); + [[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. void RefreshImage(Image& image);