From 8103dde915665f3bdbf71c67ab9e7301bb6c6d05 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 2 Jul 2024 22:52:21 +0300 Subject: [PATCH] shader_recompiler: Vs outputs --- src/core/libraries/gnmdriver/gnmdriver.cpp | 4 +- src/core/libraries/gnmdriver/gnmdriver.h | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 37 +++++++++++++++ .../spirv/emit_spirv_floating_point.cpp | 8 ++++ .../backend/spirv/emit_spirv_image.cpp | 3 +- .../backend/spirv/emit_spirv_instructions.h | 2 + .../backend/spirv/spirv_emit_context.cpp | 22 ++++++++- .../backend/spirv/spirv_emit_context.h | 13 +++-- .../frontend/translate/translate.cpp | 8 ++++ .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 12 ++++- src/shader_recompiler/ir/ir_emitter.cpp | 11 +++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 2 + src/shader_recompiler/runtime_info.h | 28 +++++++++++ src/video_core/amdgpu/liverpool.h | 5 ++ .../renderer_vulkan/liverpool_to_vk.cpp | 4 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++++++++++++++++- .../renderer_vulkan/vk_rasterizer.cpp | 20 ++++++-- src/video_core/texture_cache/image.cpp | 3 ++ src/video_core/texture_cache/image_view.cpp | 4 +- .../texture_cache/texture_cache.cpp | 4 +- 24 files changed, 223 insertions(+), 23 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index cf162db1..f4980975 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -912,9 +912,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { +u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; + return 0x800000000; } int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 2971d66b..9eb05135 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp(); int PS4_SYSV_ABI sceGnmGetEqEventType(); int PS4_SYSV_ABI sceGnmGetEqTimeStamp(); int PS4_SYSV_ABI sceGnmGetGpuBlockStatus(); -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); +u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); int PS4_SYSV_ABI sceGnmGetGpuInfoStatus(); int PS4_SYSV_ABI sceGnmGetLastWaitedAddress(); int PS4_SYSV_ABI sceGnmGetNumTcaUnits(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e2805a6f..81fb454e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -7,6 +7,37 @@ namespace Shader::Backend::SPIRV { namespace { +Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { + switch (output) { + case VsOutput::ClipDist0: + case VsOutput::ClipDist1: + case VsOutput::ClipDist2: + case VsOutput::ClipDist3: + case VsOutput::ClipDist4: + case VsOutput::ClipDist5: + case VsOutput::ClipDist6: + case VsOutput::ClipDist7: { + const u32 index = u32(output) - u32(VsOutput::ClipDist0); + const Id clip_num{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); + } + case VsOutput::CullDist0: + case VsOutput::CullDist1: + case VsOutput::CullDist2: + case VsOutput::CullDist3: + case VsOutput::CullDist4: + case VsOutput::CullDist5: + case VsOutput::CullDist6: + case VsOutput::CullDist7: { + const u32 index = u32(output) - u32(VsOutput::CullDist0); + const Id cull_num{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); + } + default: + UNREACHABLE(); + } +} + Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; @@ -20,6 +51,12 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { switch (attr) { case IR::Attribute::Position0: { return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element)); + case IR::Attribute::Position1: + case IR::Attribute::Position2: + case IR::Attribute::Position3: { + const u32 index = u32(attr) - u32(IR::Attribute::Position1); + return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]); + } case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget1: case IR::Attribute::RenderTarget2: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 18d5f35a..362ec71c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -364,4 +364,12 @@ Id EmitFPIsNan64(EmitContext& ctx, Id value) { return ctx.OpIsNan(ctx.U1[1], value); } +Id EmitFPIsInf32(EmitContext& ctx, Id value) { + return ctx.OpIsInf(ctx.U1[1], value); +} + +Id EmitFPIsInf64(EmitContext& ctx, Id value) { + return ctx.OpIsInf(ctx.U1[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index d063ab55..030352a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -140,7 +140,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); - ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), color); + const Id color_type = texture.data_types->Get(4); + ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), ctx.OpBitcast(color_type, color)); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 6cf87045..97130bf5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -241,6 +241,8 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPIsNan16(EmitContext& ctx, Id value); Id EmitFPIsNan32(EmitContext& ctx, Id value); Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitFPIsInf32(EmitContext& ctx, Id value); +Id EmitFPIsInf64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index f52f67c8..8ccf6b06 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -224,8 +224,17 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const Info& info) { switch (stage) { - case Stage::Vertex: + case Stage::Vertex: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + const std::array zero{f32_zero_value, f32_zero_value, f32_zero_value, + f32_zero_value, f32_zero_value, f32_zero_value, + f32_zero_value, f32_zero_value}; + const Id type{TypeArray(F32[1], ConstU32(8U))}; + const Id initializer{ConstantComposite(type, zero)}; + clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, + spv::StorageClass::Output, initializer); + cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, + spv::StorageClass::Output, initializer); for (u32 i = 0; i < IR::NumParams; i++) { const IR::Attribute param{IR::Attribute::Param0 + i}; if (!info.stores.GetAny(param)) { @@ -238,6 +247,7 @@ void EmitContext::DefineOutputs(const Info& info) { interfaces.push_back(id); } break; + } case Stage::Fragment: for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; @@ -319,12 +329,20 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { } if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 && image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { - return spv::ImageFormat::Rg8Snorm; + return spv::ImageFormat::Rg8; } if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rgba16f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::R8; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::Rgba8; + } UNREACHABLE(); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index e64e2d98..ef789f9f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -66,15 +66,16 @@ public: } template - [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) { + [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class, + std::optional initializer = std::nullopt) { const Id pointer_type_id{TypePointer(storage_class, type)}; - return global ? AddGlobalVariable(pointer_type_id, storage_class) - : AddLocalVariable(pointer_type_id, storage_class); + return global ? AddGlobalVariable(pointer_type_id, storage_class, initializer) + : AddLocalVariable(pointer_type_id, storage_class, initializer); } [[nodiscard]] Id DefineVariable(Id type, std::optional builtin, - spv::StorageClass storage_class) { - const Id id{DefineVar(type, storage_class)}; + spv::StorageClass storage_class, std::optional initializer = std::nullopt) { + const Id id{DefineVar(type, storage_class, initializer)}; if (builtin) { Decorate(id, spv::Decoration::BuiltIn, *builtin); } @@ -169,6 +170,8 @@ public: Id frag_depth{}; std::array frag_color{}; std::array frag_num_comp{}; + Id clip_distances{}; + Id cull_distances{}; Id workgroup_id{}; Id local_invocation_id{}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 24dc0495..58fcbd2f 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -377,6 +377,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: case Opcode::IMAGE_SAMPLE_L: + case Opcode::IMAGE_SAMPLE_C_O: translator.IMAGE_SAMPLE(inst); break; case Opcode::IMAGE_ATOMIC_ADD: @@ -490,6 +491,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CMP_NGT_F32: translator.V_CMP_F32(ConditionOp::LE, false, inst); break; + case Opcode::V_CMP_NGE_F32: + translator.V_CMP_F32(ConditionOp::LT, false, inst); + break; case Opcode::S_CMP_LT_U32: translator.S_CMP(ConditionOp::LT, false, inst); break; @@ -811,6 +815,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CMP_NE_U64: translator.V_CMP_NE_U64(inst); break; + case Opcode::V_CMP_CLASS_F32: + translator.V_CMP_CLASS_F32(inst); + break; case Opcode::V_TRUNC_F32: translator.V_TRUNC_F32(inst); break; @@ -827,6 +834,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.S_ADD_U32(inst); break; case Opcode::S_SUB_U32: + case Opcode::S_SUB_I32: translator.S_SUB_U32(inst); break; // TODO: Separate implementation for legacy variants. diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 83148077..fa86719f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -154,6 +154,7 @@ public: void V_BFREV_B32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); void V_CVT_FLR_I32_F32(const GcnInst& inst); + void V_CMP_CLASS_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index c0766825..2ab13d23 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -50,11 +50,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { }; const bool has_flt_source = is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field); - const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); + IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source); if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) { src1 = ir.BitCast(src1); } + if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) { + src0 = ir.BitCast(src0); + } const IR::Value result = ir.Select(flag, src1, src0); ir.SetVectorReg(dst_reg, IR::U32F32{result}); } @@ -513,4 +516,11 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); } +void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::U32 src1{GetSrc(inst.src[1])}; + ir.SetVcc(ir.Imm1(false)); + // TODO +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 8c982c98..7e5e46eb 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -831,6 +831,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) { } } +U1 IREmitter::FPIsInf(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPIsInf32, value); + case Type::F64: + return Inst(Opcode::FPIsInf64, value); + default: + ThrowInvalidType(value.Type()); + } +} + U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 745e0f4f..a78785d6 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -144,6 +144,7 @@ public: [[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true); [[nodiscard]] U1 FPIsNan(const F32F64& value); + [[nodiscard]] U1 FPIsInf(const F32F64& value); [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index d2f15336..f7519fb4 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -210,6 +210,8 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32, OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) OPCODE(FPIsNan32, U1, F32, ) OPCODE(FPIsNan64, U1, F64, ) +OPCODE(FPIsInf32, U1, F32, ) +OPCODE(FPIsInf64, U1, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 9d893002..eeb5db63 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -42,6 +42,33 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 7; +enum class VsOutput : u32 { + None, + PointSprite, + EdgeFlag, + KillFlag, + GsCutFlag, + GsMrtIndex, + GsVpIndex, + CullDist0, + CullDist1, + CullDist2, + CullDist3, + CullDist4, + CullDist5, + CullDist6, + CullDist7, + ClipDist0, + ClipDist1, + ClipDist2, + ClipDist3, + ClipDist4, + ClipDist5, + ClipDist6, + ClipDist7, +}; +using VsOutputMap = std::array; + struct BufferResource { u32 sgpr_base; u32 dword_offset; @@ -123,6 +150,7 @@ struct Info { }; AttributeFlags loads{}; AttributeFlags stores{}; + boost::container::static_vector vs_outputs; BufferResourceList buffers; ImageResourceList images; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2552cc61..608cc470 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -214,6 +214,10 @@ struct Liverpool { BitField<18, 1, u32> use_vtx_render_target_idx; BitField<19, 1, u32> use_vtx_viewport_idx; BitField<20, 1, u32> use_vtx_kill_flag; + BitField<21, 1, u32> vs_out_misc_enable; + BitField<22, 1, u32> vs_out_ccdist0_enable; + BitField<23, 1, u32> vs_out_ccdist1_enable; + BitField<25, 1, u32> use_vtx_gs_cut_flag; bool IsClipDistEnabled(u32 index) const { return (clip_distance_enable.Value() >> index) & 1; @@ -1027,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 37d9188c..da7f49f0 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -400,6 +400,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR32G32Uint; } + if (data_format == AmdGpu::DataFormat::Format4_4_4_4 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR4G4B4A4UnormPack16; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cfbacd48..0f5a64ce 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -85,8 +85,8 @@ ComputePipeline::~ComputePipeline() = default; bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const { // Bind resource buffers and textures. - boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; + boost::container::static_vector buffer_infos; + boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 1d074814..1a56ae62 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -40,6 +40,7 @@ private: vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; + u64 compute_key; Shader::Info info{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ab3585d3..5ba9e6a1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -18,6 +18,48 @@ extern std::unique_ptr renderer; namespace Vulkan { +using Shader::VsOutput; + +void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { + const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { + if (x != VsOutput::None || y != VsOutput::None || + z != VsOutput::None || w != VsOutput::None) { + info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + } + }; + // VS_OUT_MISC_VEC + add_output( + ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, + ctl.use_vtx_edge_flag ? VsOutput::EdgeFlag : + (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), + ctl.use_vtx_kill_flag ? VsOutput::KillFlag : + (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), + ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None + ); + // VS_OUT_CCDIST0 + add_output( + ctl.IsClipDistEnabled(0) ? VsOutput::ClipDist0 : + (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), + ctl.IsClipDistEnabled(1) ? VsOutput::ClipDist1 : + (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), + ctl.IsClipDistEnabled(2) ? VsOutput::ClipDist2 : + (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), + ctl.IsClipDistEnabled(3) ? VsOutput::ClipDist3 : + (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None) + ); + // VS_OUT_CCDIST1 + add_output( + ctl.IsClipDistEnabled(4) ? VsOutput::ClipDist4 : + (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), + ctl.IsClipDistEnabled(5) ? VsOutput::ClipDist5 : + (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), + ctl.IsClipDistEnabled(6) ? VsOutput::ClipDist6 : + (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), + ctl.IsClipDistEnabled(7) ? VsOutput::ClipDist7 : + (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None) + ); +} + Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, const AmdGpu::Liverpool::Regs& regs) { Shader::Info info{}; @@ -26,6 +68,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_d switch (stage) { case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; + BuildVsOutputs(info, regs.vs_output_control); break; } case Shader::Stage::Fragment: { @@ -171,13 +214,13 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { // actual draw hence can skip pipeline creation. if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { LOG_TRACE(Render_Vulkan, "FCE pass skipped"); - return {}; + //return {}; } if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) { // TODO: check for a valid MRT1 to promote the draw to the resolve pass. LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); - return {}; + //return {}; } u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 806332ca..9a09c1ff 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -44,7 +44,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + try { + pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + } catch (...) { + UNREACHABLE(); + } BeginRendering(); UpdateDynamicState(*pipeline); @@ -70,9 +74,13 @@ void Rasterizer::DispatchDirect() { return; } - const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache); - if (!has_resources) { - return; + try { + const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache); + if (!has_resources) { + return; + } + } catch (...) { + UNREACHABLE(); } scheduler.EndRendering(); @@ -129,6 +137,10 @@ void Rasterizer::BeginRendering() { }; texture_cache.TouchMeta(htile_address, false); state.num_depth_attachments++; + } else { + if (regs.depth_render_control.depth_compress_disable) { + LOG_WARNING(Render_Vulkan, "No depth buffer bound with dcc"); + } } scheduler.BeginRendering(state); } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f06492ef..d34e8ce1 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -221,6 +221,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + if (cpu_addr == 2990538752ULL) { + printf("bad\n"); + } ASSERT(info.pixel_format != vk::Format::eUndefined); vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 4fc0589c..e9f79b2b 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -77,7 +77,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info if (usage_override) { usage_ci.usage = usage_override.value(); } - + if (info.format == vk::Format::eR32Sfloat) { + printf("stop\n"); + } // When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it. vk::Format format = info.format; vk::ImageAspectFlags aspect = image.aspect_mask; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 7ea610db..0a10fa80 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -127,14 +127,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r image_ids.push_back(image_id); }); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); + //ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); ImageId image_id{}; if (image_ids.empty()) { image_id = slot_images.insert(instance, scheduler, info, cpu_address); RegisterImage(image_id); } else { - image_id = image_ids[0]; + image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } RegisterMeta(info, image_id);