From 4d728e943d6df4ed0de5234a0cf3f082db2fd1e4 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 27 May 2024 22:12:49 +0300 Subject: [PATCH] video_core: Address some feedback --- src/core/libraries/gnmdriver/gnmdriver.cpp | 32 +++++++++---------- .../backend/spirv/spirv_emit_context.cpp | 4 +-- src/video_core/amdgpu/liverpool.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++ .../renderer_vulkan/vk_graphics_pipeline.h | 5 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 3 ++ .../renderer_vulkan/vk_rasterizer.cpp | 7 ++-- 8 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 67558945..a2358955 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -888,29 +888,27 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, // A fullscreen triangle with one uv set // clang-format off constexpr static std::array shader_code alignas(256) = { - 0xbeeb03ffu, 0x00000009u, // s_mov_b32 vcc_hi, lit(9) - 0x36020081u, // v_and_b32 v1, 1, v0 - 0x36000082u, // v_and_b32 v0, 2, v0 - 0x7e000d00u, // v_cvt_f32_u32 v0, v0 - 0x7e040d01u, // v_cvt_f32_u32 v2, v1 - 0xd2820003u, 0x3ce00f4u, // v_mad_f32 v3, 2.0, v0, -1.0 - 0xd2820004u, 0x3ce04f6u, // v_mad_f32 v4, 4.0, v2, -1.0 - 0x7e020280u, // v_mov_b32 v1, 0 - 0x7e0a02f2u, // v_mov_b32 v5, 1.0 - 0xf80008cfu, 0x5010403u, // exp pos0, v3, v4, v1, v5 done - 0x100404f4u, // v_mul_f32 v2, 2.0, v2 - 0xf800020fu, 0x1010200u, // exp param0, v0, v2, v1, v1 - 0xbf810000u, // s_endpgm - 0x302u, - 0x46d611cu, + 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 + 0x36020081u, // v_and_b32 v1, 1, v0 + 0x34020281u, // v_lshlrev_b32 v1, 1, v1 + 0x360000c2u, // v_and_b32 v0, -2, v0 + 0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1 + 0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0 + 0x7e020b01u, // v_cvt_f32_i32 v1, v1 + 0x7E000B00U, + 0x7e040280u, // v_cvt_f32_i32 v0, v0 + 0x7e0602f2u, // v_mov_b32 v3, 1.0 + 0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done + 0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3 + 0xbf810000u, // s_endpgm // OrbShdr header - 0x5362724fu, 0x7726468u, 0x4845u, 0x5080002u, 0xd1e7de61u, 0x0u, 0xb9cae598u, + 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, }; // clang-format on const auto shader_addr = uintptr_t(shader_code.data()); // Original address is 0xfe000f10 - ASSERT((shader_addr & 0xFF) == 0); const static u32 vs_regs[] = { u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4b9e696e..a2a0fc9a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -237,9 +237,9 @@ void EmitContext::DefineBuffers(const Info& info) { ASSERT(buffer.stride % sizeof(float) == 0); const u32 num_elements = buffer.stride * buffer.num_records / sizeof(float); const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))}; - Decorate(record_array_type, spv::Decoration::ArrayStride, sizeof(float)); - const Id struct_type{TypeStruct(record_array_type)}; + Decorate(record_array_type, spv::Decoration::ArrayStride, 4); + const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); Name(struct_type, name); Decorate(struct_type, spv::Decoration::Block); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9033fbab..1ddf4fc9 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -65,7 +65,7 @@ struct Liverpool { } std::span Code() const { - u32 code_size = 1; + u32 code_size = 0; const u32* code = Address(); static constexpr std::string_view PostHeader = "OrbShdr"; while (std::memcmp(code + code_size, PostHeader.data(), PostHeader.size()) != 0) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 315015cc..b15760ba 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -80,7 +80,7 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; case Liverpool::PrimitiveType::RectList: - return vk::PrimitiveTopology::eTriangleList; + return vk::PrimitiveTopology::eTriangleStrip; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index bede2225..1815224c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -62,6 +62,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = attributes.data(), }; + ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(), + "Rectangle List primitive type is only supported for embedded VS"); + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), .primitiveRestartEnable = false, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 95accfbd..fc8b4fa1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -61,6 +61,11 @@ public: return *pipeline; } + [[nodiscard]] bool IsEmbeddedVs() const noexcept { + static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd; + return key.stage_hashes[0] == EmbeddedVsHash; + } + private: void BuildDescSetLayout(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 14b5227e..54f81267 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -145,6 +145,9 @@ void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stag std::string_view ext) { using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; file.WriteSpan(code); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index cd52d796..aea93487 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -22,7 +22,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } @@ -63,7 +63,8 @@ void Rasterizer::Draw(bool is_indexed) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0); + const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices; + cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); } @@ -97,7 +98,7 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { // Upload index data to stream buffer. const auto index_address = regs.index_base_address.Address(); - const u32 index_buffer_size = regs.num_indices * 4; + const u32 index_buffer_size = regs.num_indices * index_size; const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size); std::memcpy(data, index_address, index_buffer_size); vertex_index_buffer.Commit(index_buffer_size);