From b387ce9bbd6f234c5c99e867f36b93f51a8f63b1 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:44:10 +0300 Subject: [PATCH] video_core: Use multi level page table for caches --- CMakeLists.txt | 4 +- .../object_pool.h | 4 +- .../libraries/kernel/thread_management.cpp | 17 +---- src/core/libraries/pad/pad.cpp | 4 +- .../frontend/control_flow_graph.cpp | 2 +- .../frontend/control_flow_graph.h | 6 +- src/shader_recompiler/frontend/module.h | 10 --- .../frontend/structured_control_flow.cpp | 22 ++++--- .../frontend/structured_control_flow.h | 5 +- src/shader_recompiler/ir/basic_block.cpp | 2 +- src/shader_recompiler/ir/basic_block.h | 6 +- .../ir/passes/resource_tracking_pass.cpp | 12 ++++ src/shader_recompiler/recompiler.cpp | 8 +-- src/shader_recompiler/recompiler.h | 6 +- src/video_core/buffer_cache/buffer_cache.h | 12 +++- src/video_core/buffer_cache/range_set.cpp | 0 src/video_core/multi_level_page_table.h | 65 +++++++++++++++++++ src/video_core/page_manager.cpp | 51 +++++++++++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 35 +++++----- .../renderer_vulkan/vk_instance.cpp | 12 +++- .../renderer_vulkan/vk_pipeline_cache.h | 5 +- .../renderer_vulkan/vk_rasterizer.cpp | 1 + .../renderer_vulkan/vk_rasterizer.h | 1 + .../texture_cache/texture_cache.cpp | 6 +- src/video_core/texture_cache/texture_cache.h | 24 +++---- src/video_core/texture_cache/tile_manager.cpp | 10 ++- 26 files changed, 229 insertions(+), 101 deletions(-) rename src/{shader_recompiler => common}/object_pool.h (98%) delete mode 100644 src/shader_recompiler/frontend/module.h delete mode 100644 src/video_core/buffer_cache/range_set.cpp create mode 100644 src/video_core/multi_level_page_table.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a05acf5..fbba0359 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,6 +283,7 @@ set(COMMON src/common/logging/backend.cpp src/common/native_clock.h src/common/path_util.cpp src/common/path_util.h + src/common/object_pool.h src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h @@ -368,7 +369,6 @@ set(CORE src/core/aerolib/stubs.cpp ) set(SHADER_RECOMPILER src/shader_recompiler/exception.h - src/shader_recompiler/object_pool.h src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h @@ -457,7 +457,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.h src/video_core/buffer_cache/memory_tracker_base.h - src/video_core/buffer_cache/range_set.cpp src/video_core/buffer_cache/range_set.h src/video_core/buffer_cache/word_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -507,6 +506,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/types.h src/video_core/page_manager.cpp src/video_core/page_manager.h + src/video_core/multi_level_page_table.h src/video_core/renderdoc.cpp src/video_core/renderdoc.h ) diff --git a/src/shader_recompiler/object_pool.h b/src/common/object_pool.h similarity index 98% rename from src/shader_recompiler/object_pool.h rename to src/common/object_pool.h index 1398898a..9e25e0c4 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/common/object_pool.h @@ -8,7 +8,7 @@ #include #include -namespace Shader { +namespace Common { template requires std::is_destructible_v @@ -104,4 +104,4 @@ private: size_t new_chunk_size{}; }; -} // namespace Shader +} // namespace Common diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 1b4c48fb..19ce1bdd 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -465,7 +465,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) { int result = pthread_mutex_destroy(&(*mutex)->pth_mutex); - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + LOG_DEBUG(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); delete *mutex; *mutex = nullptr; @@ -725,7 +725,7 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) { } int result = pthread_cond_destroy(&(*cond)->cond); - LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); + LOG_DEBUG(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); delete *cond; *cond = nullptr; @@ -811,8 +811,6 @@ int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMu } int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) { - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_broadcast redirect to scePthreadCondBroadcast"); int result = scePthreadCondBroadcast(cond); if (result != 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -824,7 +822,6 @@ int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) { } int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { - // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); int result = scePthreadMutexattrInit(attr); if (result < 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -836,7 +833,6 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { } int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) { - // LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit"); int result = scePthreadMutexattrSettype(attr, type); if (result < 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -861,7 +857,6 @@ int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_ro int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) { int result = scePthreadMutexattrSetprotocol(attr, protocol); - LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result); if (result < 0) { UNREACHABLE(); } @@ -1304,8 +1299,6 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr, PthreadEntryFunc start_routine, void* arg, const char* name) { - LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name); - int result = scePthreadCreate(thread, attr, start_routine, arg, name); if (result != 0) { int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP @@ -1352,17 +1345,11 @@ int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadC int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) { int result = scePthreadCondSignal(cond); - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_signal redirect to scePthreadCondSignal, result = {}", - result); return result; } int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) { int result = scePthreadCondDestroy(cond); - LOG_INFO(Kernel_Pthread, - "posix posix_pthread_cond_destroy redirect to scePthreadCondDestroy, result = {}", - result); return result; } diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index e318e152..064c71b8 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -470,7 +470,7 @@ int PS4_SYSV_ABI scePadSetUserColor() { } int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); + LOG_DEBUG(Lib_Pad, "(STUBBED) called"); return ORBIS_OK; } @@ -665,4 +665,4 @@ void RegisterlibScePad(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7xA+hFtvBCA", "libScePad", 1, "libScePad", 1, 1, Func_EF103E845B6F0420); }; -} // namespace Libraries::Pad \ No newline at end of file +} // namespace Libraries::Pad diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 5eadae1b..2925c05d 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) { } } -CFG::CFG(ObjectPool& block_pool_, std::span inst_list_) +CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) : block_pool{block_pool_}, inst_list{inst_list_} { index_to_pc.resize(inst_list.size() + 1); EmitLabels(); diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index 07190087..ebe614ee 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -8,10 +8,10 @@ #include #include +#include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/condition.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Gcn { @@ -49,7 +49,7 @@ class CFG { using Label = u32; public: - explicit CFG(ObjectPool& block_pool, std::span inst_list); + explicit CFG(Common::ObjectPool& block_pool, std::span inst_list); [[nodiscard]] std::string Dot() const; @@ -59,7 +59,7 @@ private: void LinkBlocks(); public: - ObjectPool& block_pool; + Common::ObjectPool& block_pool; std::span inst_list; std::vector index_to_pc; boost::container::small_vector labels; diff --git a/src/shader_recompiler/frontend/module.h b/src/shader_recompiler/frontend/module.h deleted file mode 100644 index 3901f021..00000000 --- a/src/shader_recompiler/frontend/module.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -namespace Shader::Gcn { - -void Translate(); - -} // namespace Shader::Gcn \ No newline at end of file diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index c8d73858..b50205d4 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { */ class GotoPass { public: - explicit GotoPass(CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { + explicit GotoPass(CFG& cfg, Common::ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; const auto end{gotos.rend()}; for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { @@ -563,7 +563,7 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& pool; + Common::ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -597,8 +597,9 @@ private: class TranslatePass { public: - TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, + TranslatePass(Common::ObjectPool& inst_pool_, + Common::ObjectPool& block_pool_, + Common::ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, Info& info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, @@ -808,9 +809,9 @@ private: return block_pool.Create(inst_pool); } - ObjectPool& stmt_pool; - ObjectPool& inst_pool; - ObjectPool& block_pool; + Common::ObjectPool& stmt_pool; + Common::ObjectPool& inst_pool; + Common::ObjectPool& block_pool; IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; @@ -819,9 +820,10 @@ private: }; } // Anonymous namespace -IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Info& info, const Profile& profile) { - ObjectPool stmt_pool{64}; +IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, CFG& cfg, Info& info, + const Profile& profile) { + Common::ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index da4ef1ff..f5a54051 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -7,7 +7,6 @@ #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/value.h" -#include "shader_recompiler/object_pool.h" namespace Shader { struct Info; @@ -16,8 +15,8 @@ struct Profile; namespace Shader::Gcn { -[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, - ObjectPool& block_pool, CFG& cfg, +[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, CFG& cfg, Info& info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 622a6249..60ba0647 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -9,7 +9,7 @@ namespace Shader::IR { -Block::Block(ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} +Block::Block(Common::ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} Block::~Block() = default; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 5a7036c6..1eb11469 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -9,10 +9,10 @@ #include #include +#include "common/object_pool.h" #include "common/types.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/value.h" -#include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -25,7 +25,7 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(ObjectPool& inst_pool_); + explicit Block(Common::ObjectPool& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -153,7 +153,7 @@ public: private: /// Memory pool for instruction list - ObjectPool* inst_pool; + Common::ObjectPool* inst_pool; /// List of instructions in this block InstructionList instructions; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 169f6da0..bc1eb409 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -433,6 +433,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, return; } + if (IsLoadBufferFormat(inst)) { + if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) { + info.uses_fp16 = true; + } + } else { + const u32 stride = buffer.GetStride(); + if (stride < 4) { + LOG_WARNING(Render_Vulkan, + "non-formatting load_buffer_* is not implemented for stride {}", stride); + } + } + // Compute address of the buffer using the stride. // Todo: What if buffer is rebound with different stride? IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 392ec772..69eec50f 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span token, const Info&& info, - const Profile& profile) { +IR::Program TranslateProgram(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, std::span token, + const Info&& info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool gcn_block_pool{64}; + Common::ObjectPool gcn_block_pool{64}; Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 297d4158..34e958a1 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,16 +3,16 @@ #pragma once +#include "common/object_pool.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" -#include "shader_recompiler/object_pool.h" namespace Shader { struct Profile; -[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, +[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool& inst_pool, + Common::ObjectPool& block_pool, std::span code, const Info&& info, const Profile& profile); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 1a99b2b3..0dee87cf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -13,7 +13,7 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/memory_tracker_base.h" -#include "video_core/buffer_cache/range_set.h" +#include "video_core/multi_level_page_table.h" namespace AmdGpu { struct Liverpool; @@ -37,6 +37,14 @@ public: static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 DEVICE_PAGESIZE = 4_KB; + struct Traits { + using Entry = BufferId; + static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t FirstLevelBits = 14; + static constexpr size_t PageBits = CACHING_PAGEBITS; + }; + using PageTable = MultiLevelPageTable; + struct OverlapResult { boost::container::small_vector ids; VAddr begin; @@ -115,7 +123,7 @@ private: std::recursive_mutex mutex; Common::SlotVector slot_buffers; MemoryTracker memory_tracker; - std::array> CACHING_PAGEBITS)> page_table; + PageTable page_table; }; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/range_set.cpp b/src/video_core/buffer_cache/range_set.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/src/video_core/multi_level_page_table.h b/src/video_core/multi_level_page_table.h new file mode 100644 index 00000000..527476f3 --- /dev/null +++ b/src/video_core/multi_level_page_table.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/object_pool.h" +#include "common/types.h" + +namespace VideoCore { + +template +class MultiLevelPageTable final { + using Entry = typename Traits::Entry; + + static constexpr size_t AddressSpaceBits = Traits::AddressSpaceBits; + static constexpr size_t FirstLevelBits = Traits::FirstLevelBits; + static constexpr size_t PageBits = Traits::PageBits; + static constexpr size_t FirstLevelShift = AddressSpaceBits - FirstLevelBits; + static constexpr size_t SecondLevelBits = FirstLevelShift - PageBits; + static constexpr size_t NumEntriesPerL1Page = 1ULL << SecondLevelBits; + + using L1Page = std::array; + +public: + explicit MultiLevelPageTable() : first_level_map{1ULL << FirstLevelBits, nullptr} {} + + ~MultiLevelPageTable() noexcept = default; + + [[nodiscard]] Entry* find(size_t page) { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + return nullptr; + } + return &(*first_level_map[l1_page])[l2_page]; + } + + [[nodiscard]] const Entry& operator[](size_t page) const { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + first_level_map[l1_page] = page_alloc.Create(); + } + return (*first_level_map[l1_page])[l2_page]; + } + + [[nodiscard]] Entry& operator[](size_t page) { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + first_level_map[l1_page] = page_alloc.Create(); + } + return (*first_level_map[l1_page])[l2_page]; + } + +private: + std::vector first_level_map{}; + Common::ObjectPool page_alloc; +}; + +} // namespace VideoCore diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 3ec12203..10a2e5ff 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -22,7 +22,44 @@ namespace VideoCore { constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGEBITS = 12; -#ifdef SHADPS4_USERFAULTFD +#ifdef _WIN64 +struct PageManager::Impl { + Impl(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; + + veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler); + ASSERT_MSG(veh_handle, "Failed to register an exception handler"); + } + + void OnMap(VAddr address, size_t size) {} + + void OnUnmap(VAddr address, size_t size) {} + + void Protect(VAddr address, size_t size, bool allow_write) { + DWORD prot = PROT_READ | (allow_write ? PROT_WRITE : 0); + DWORD old_prot{}; + BOOL result = VirtualProtect(std::bit_cast(address), len, prot, &old_prot); + ASSERT_MSG(result != 0, "Region protection failed"); + } + + static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept { + const u32 ec = pExp->ExceptionRecord->ExceptionCode; + if (ec == EXCEPTION_ACCESS_VIOLATION) { + const auto info = pExp->ExceptionRecord->ExceptionInformation; + if (info[0] == 1) { // Write violation + rasterizer->InvalidateMemory(info[1], sizeof(u64)); + return EXCEPTION_CONTINUE_EXECUTION; + } /* else { + UNREACHABLE(); + }*/ + } + return EXCEPTION_CONTINUE_SEARCH; // pass further + } + + inline static Vulkan::Rasterizer* rasterizer; + void* veh_handle{}; +}; +#elif ENABLE_USERFAULTFD struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); @@ -121,9 +158,15 @@ struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; +#ifdef __APPLE__ + // Read-only memory write results in SIGBUS on Apple. + static constexpr int SignalType = SIGBUS; +#else + static constexpr int SignalType = SIGSEGV; +#endif sigset_t signal_mask; sigemptyset(&signal_mask); - sigaddset(&signal_mask, SIGSEGV); + sigaddset(&signal_mask, SignalType); using HandlerType = decltype(sigaction::sa_sigaction); @@ -131,7 +174,7 @@ struct PageManager::Impl { guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK; guest_access_fault.sa_sigaction = &GuestFaultSignalHandler; guest_access_fault.sa_mask = signal_mask; - sigaction(SIGSEGV, &guest_access_fault, nullptr); + sigaction(SignalType, &guest_access_fault, nullptr); } void OnMap(VAddr address, size_t size) {} @@ -148,7 +191,7 @@ struct PageManager::Impl { const VAddr address = reinterpret_cast(info->si_addr); const greg_t err = ctx->uc_mcontext.gregs[REG_ERR]; if (err & 0x2) { - rasterizer->InvalidateMemory(address, PAGESIZE); + rasterizer->InvalidateMemory(address, sizeof(u64)); } else { // Read not supported! UNREACHABLE(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c9f76124..d64d382b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -346,19 +346,26 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } for (const auto& buffer : stage.buffers) { const auto vsharp = buffer.GetVsharp(stage); - const VAddr address = vsharp.base_address; - const u32 size = vsharp.GetSize(); - const u32 alignment = - buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(address, size, buffer.is_written); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); + if (vsharp) { + const VAddr address = vsharp.base_address; + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); + } + const u32 size = vsharp.GetSize(); + const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment() + : instance.UniformMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(address, size, buffer.is_written); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + if (adjust != 0) { + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding, adjust); + } + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); + } else { + buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); } - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, @@ -368,10 +375,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); - - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)"); - } } boost::container::static_vector tsharps; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 735303a3..2d396daf 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -204,7 +204,8 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. @@ -303,12 +304,19 @@ bool Instance::CreateDevice() { .workgroupMemoryExplicitLayoutScalarBlockLayout = true, .workgroupMemoryExplicitLayout8BitAccess = true, .workgroupMemoryExplicitLayout16BitAccess = true, - }}; + }, + vk::PhysicalDeviceRobustness2FeaturesEXT{ + .nullDescriptor = true, + }, + }; if (!color_write_en) { device_chain.unlink(); device_chain.unlink(); } + if (!robustness) { + device_chain.unlink(); + } try { device = physical_device.createDeviceUnique(device_chain.get()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index a77b298b..d41723ec 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -5,7 +5,6 @@ #include #include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -51,8 +50,8 @@ private: Shader::Profile profile{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 34807323..51de09f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -23,6 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, liverpool->BindRasterizer(this); } memory->SetRasterizer(this); + wfi_event = instance.GetDevice().createEventUnique({}); } Rasterizer::~Rasterizer() = default; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7a2d105b..685ba6e0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -60,6 +60,7 @@ private: AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; PipelineCache pipeline_cache; + vk::UniqueEvent wfi_event; }; } // namespace Vulkan diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index bffa6eff..0070eb23 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -300,11 +300,11 @@ void TextureCache::UnregisterImage(ImageId image_id) { image.flags &= ~ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); + if (page_it == nullptr) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift); return; } - auto& image_ids = page_it.value(); + auto& image_ids = *page_it; const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a5e3210d..5753907e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -9,6 +9,7 @@ #include "common/slot_vector.h" #include "video_core/amdgpu/resource.h" +#include "video_core/multi_level_page_table.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" @@ -25,12 +26,13 @@ class BufferCache; class PageManager; class TextureCache { - // This is the page shift for adding images into the hash map. It isn't related to - // the page size of the guest or the host and is chosen for convenience. A number too - // small will increase the number of hash map lookups per image, while too large will - // increase the number of images per page. - static constexpr u64 PageBits = 22; - static constexpr u64 PageMask = (1ULL << PageBits) - 1; + struct Traits { + using Entry = boost::container::small_vector; + static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t FirstLevelBits = 9; + static constexpr size_t PageBits = 22; + }; + using PageTable = MultiLevelPageTable; public: explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, @@ -97,8 +99,8 @@ private: template static void ForEachPage(PAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PageBits; - for (u64 page = addr >> PageBits; page <= page_end; ++page) { + const u64 page_end = (addr + size - 1) >> Traits::PageBits; + for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; @@ -116,14 +118,14 @@ private: boost::container::small_vector images; ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); - if (it == page_table.end()) { + if (it == nullptr) { if constexpr (BOOL_BREAK) { return false; } else { return; } } - for (const ImageId image_id : it->second) { + for (const ImageId image_id : *it) { Image& image = slot_images[image_id]; if (image.flags & ImageFlagBits::Picked) { continue; @@ -175,7 +177,7 @@ private: Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; - tsl::robin_pg_map> page_table; + PageTable page_table; boost::icl::interval_map cached_pages; std::mutex mutex; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index bb7ad22e..d3a7d796 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -202,12 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eBc5UnormBlock: case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: + case vk::Format::eBc6HUfloatBlock: case vk::Format::eR32G32B32A32Sfloat: return vk::Format::eR32G32B32A32Uint; default: break; } - LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format)); + + // Log missing formats only once to avoid spamming the log. + static constexpr size_t MaxFormatIndex = 256; + static std::array logged_formats{}; + if (const u32 index = u32(format); !logged_formats[index]) { + LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format)); + logged_formats[index] = true; + } return format; }