video_core: Use multi level page table for caches

This commit is contained in:
IndecisiveTurtle 2024-08-07 19:44:10 +03:00
parent efcabbfd2a
commit b387ce9bbd
26 changed files with 229 additions and 101 deletions

View File

@ -283,6 +283,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/native_clock.h src/common/native_clock.h
src/common/path_util.cpp src/common/path_util.cpp
src/common/path_util.h src/common/path_util.h
src/common/object_pool.h
src/common/polyfill_thread.h src/common/polyfill_thread.h
src/common/rdtsc.cpp src/common/rdtsc.cpp
src/common/rdtsc.h src/common/rdtsc.h
@ -368,7 +369,6 @@ set(CORE src/core/aerolib/stubs.cpp
) )
set(SHADER_RECOMPILER src/shader_recompiler/exception.h set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/object_pool.h
src/shader_recompiler/profile.h src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h src/shader_recompiler/recompiler.h
@ -457,7 +457,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.cpp
src/video_core/buffer_cache/buffer_cache.h src/video_core/buffer_cache/buffer_cache.h
src/video_core/buffer_cache/memory_tracker_base.h src/video_core/buffer_cache/memory_tracker_base.h
src/video_core/buffer_cache/range_set.cpp
src/video_core/buffer_cache/range_set.h src/video_core/buffer_cache/range_set.h
src/video_core/buffer_cache/word_manager.h src/video_core/buffer_cache/word_manager.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@ -507,6 +506,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/texture_cache/types.h src/video_core/texture_cache/types.h
src/video_core/page_manager.cpp src/video_core/page_manager.cpp
src/video_core/page_manager.h src/video_core/page_manager.h
src/video_core/multi_level_page_table.h
src/video_core/renderdoc.cpp src/video_core/renderdoc.cpp
src/video_core/renderdoc.h src/video_core/renderdoc.h
) )

View File

@ -8,7 +8,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
namespace Shader { namespace Common {
template <typename T> template <typename T>
requires std::is_destructible_v<T> requires std::is_destructible_v<T>
@ -104,4 +104,4 @@ private:
size_t new_chunk_size{}; size_t new_chunk_size{};
}; };
} // namespace Shader } // namespace Common

View File

@ -465,7 +465,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) {
int result = pthread_mutex_destroy(&(*mutex)->pth_mutex); int result = pthread_mutex_destroy(&(*mutex)->pth_mutex);
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); LOG_DEBUG(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
delete *mutex; delete *mutex;
*mutex = nullptr; *mutex = nullptr;
@ -725,7 +725,7 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
} }
int result = pthread_cond_destroy(&(*cond)->cond); int result = pthread_cond_destroy(&(*cond)->cond);
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); LOG_DEBUG(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
delete *cond; delete *cond;
*cond = nullptr; *cond = nullptr;
@ -811,8 +811,6 @@ int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMu
} }
int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) { int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_broadcast redirect to scePthreadCondBroadcast");
int result = scePthreadCondBroadcast(cond); int result = scePthreadCondBroadcast(cond);
if (result != 0) { if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -824,7 +822,6 @@ int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
} }
int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadMutexattrInit(attr); int result = scePthreadMutexattrInit(attr);
if (result < 0) { if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -836,7 +833,6 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
} }
int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) { int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexattrSettype(attr, type); int result = scePthreadMutexattrSettype(attr, type);
if (result < 0) { if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -861,7 +857,6 @@ int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_ro
int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) { int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) {
int result = scePthreadMutexattrSetprotocol(attr, protocol); int result = scePthreadMutexattrSetprotocol(attr, protocol);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result);
if (result < 0) { if (result < 0) {
UNREACHABLE(); UNREACHABLE();
} }
@ -1304,8 +1299,6 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det
int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr, int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr,
PthreadEntryFunc start_routine, void* arg, PthreadEntryFunc start_routine, void* arg,
const char* name) { const char* name) {
LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name);
int result = scePthreadCreate(thread, attr, start_routine, arg, name); int result = scePthreadCreate(thread, attr, start_routine, arg, name);
if (result != 0) { if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -1352,17 +1345,11 @@ int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadC
int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) { int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) {
int result = scePthreadCondSignal(cond); int result = scePthreadCondSignal(cond);
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_signal redirect to scePthreadCondSignal, result = {}",
result);
return result; return result;
} }
int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) { int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) {
int result = scePthreadCondDestroy(cond); int result = scePthreadCondDestroy(cond);
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_destroy redirect to scePthreadCondDestroy, result = {}",
result);
return result; return result;
} }

View File

@ -470,7 +470,7 @@ int PS4_SYSV_ABI scePadSetUserColor() {
} }
int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) { int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) {
LOG_ERROR(Lib_Pad, "(STUBBED) called"); LOG_DEBUG(Lib_Pad, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
} }
} }
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_) CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
: block_pool{block_pool_}, inst_list{inst_list_} { : block_pool{block_pool_}, inst_list{inst_list_} {
index_to_pc.resize(inst_list.size() + 1); index_to_pc.resize(inst_list.size() + 1);
EmitLabels(); EmitLabels();

View File

@ -8,10 +8,10 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/intrusive/set.hpp> #include <boost/intrusive/set.hpp>
#include "common/object_pool.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/condition.h" #include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -49,7 +49,7 @@ class CFG {
using Label = u32; using Label = u32;
public: public:
explicit CFG(ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list); explicit CFG(Common::ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
[[nodiscard]] std::string Dot() const; [[nodiscard]] std::string Dot() const;
@ -59,7 +59,7 @@ private:
void LinkBlocks(); void LinkBlocks();
public: public:
ObjectPool<Block>& block_pool; Common::ObjectPool<Block>& block_pool;
std::span<const GcnInst> inst_list; std::span<const GcnInst> inst_list;
std::vector<u32> index_to_pc; std::vector<u32> index_to_pc;
boost::container::small_vector<Label, 16> labels; boost::container::small_vector<Label, 16> labels;

View File

@ -1,10 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Shader::Gcn {
void Translate();
} // namespace Shader::Gcn

View File

@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
*/ */
class GotoPass { class GotoPass {
public: public:
explicit GotoPass(CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { explicit GotoPass(CFG& cfg, Common::ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
std::vector gotos{BuildTree(cfg)}; std::vector gotos{BuildTree(cfg)};
const auto end{gotos.rend()}; const auto end{gotos.rend()};
for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
@ -563,7 +563,7 @@ private:
return parent_tree.insert(std::next(loop), *new_goto); return parent_tree.insert(std::next(loop), *new_goto);
} }
ObjectPool<Statement>& pool; Common::ObjectPool<Statement>& pool;
Statement root_stmt{FunctionTag{}}; Statement root_stmt{FunctionTag{}};
}; };
@ -597,8 +597,9 @@ private:
class TranslatePass { class TranslatePass {
public: public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, TranslatePass(Common::ObjectPool<IR::Inst>& inst_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt, Common::ObjectPool<IR::Block>& block_pool_,
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_, IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Info& info_, const Profile& profile_) Info& info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
@ -808,9 +809,9 @@ private:
return block_pool.Create(inst_pool); return block_pool.Create(inst_pool);
} }
ObjectPool<Statement>& stmt_pool; Common::ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool; Common::ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool; Common::ObjectPool<IR::Block>& block_pool;
IR::AbstractSyntaxList& syntax_list; IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{.is_dummy = true}; const Block dummy_flow_block{.is_dummy = true};
std::span<const GcnInst> inst_list; std::span<const GcnInst> inst_list;
@ -819,9 +820,10 @@ private:
}; };
} // Anonymous namespace } // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
CFG& cfg, Info& info, const Profile& profile) { Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
ObjectPool<Statement> stmt_pool{64}; const Profile& profile) {
Common::ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool}; GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()}; Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list; IR::AbstractSyntaxList syntax_list;

View File

@ -7,7 +7,6 @@
#include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader { namespace Shader {
struct Info; struct Info;
@ -16,8 +15,8 @@ struct Profile;
namespace Shader::Gcn { namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg, Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
Info& info, const Profile& profile); Info& info, const Profile& profile);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -9,7 +9,7 @@
namespace Shader::IR { namespace Shader::IR {
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {} Block::Block(Common::ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::~Block() = default; Block::~Block() = default;

View File

@ -9,10 +9,10 @@
#include <vector> #include <vector>
#include <boost/intrusive/list.hpp> #include <boost/intrusive/list.hpp>
#include "common/object_pool.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::IR { namespace Shader::IR {
@ -25,7 +25,7 @@ public:
using reverse_iterator = InstructionList::reverse_iterator; using reverse_iterator = InstructionList::reverse_iterator;
using const_reverse_iterator = InstructionList::const_reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator;
explicit Block(ObjectPool<Inst>& inst_pool_); explicit Block(Common::ObjectPool<Inst>& inst_pool_);
~Block(); ~Block();
Block(const Block&) = delete; Block(const Block&) = delete;
@ -153,7 +153,7 @@ public:
private: private:
/// Memory pool for instruction list /// Memory pool for instruction list
ObjectPool<Inst>* inst_pool; Common::ObjectPool<Inst>* inst_pool;
/// List of instructions in this block /// List of instructions in this block
InstructionList instructions; InstructionList instructions;

View File

@ -433,6 +433,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
return; return;
} }
if (IsLoadBufferFormat(inst)) {
if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) {
info.uses_fp16 = true;
}
} else {
const u32 stride = buffer.GetStride();
if (stride < 4) {
LOG_WARNING(Render_Vulkan,
"non-formatting load_buffer_* is not implemented for stride {}", stride);
}
}
// Compute address of the buffer using the stride. // Compute address of the buffer using the stride.
// Todo: What if buffer is rebound with different stride? // Todo: What if buffer is rebound with different stride?
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());

View File

@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks; return blocks;
} }
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
std::span<const u32> token, const Info&& info, Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
const Profile& profile) { const Info&& info, const Profile& profile) {
// Ensure first instruction is expected. // Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF; constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
} }
// Create control flow graph // Create control flow graph
ObjectPool<Gcn::Block> gcn_block_pool{64}; Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
Gcn::CFG cfg{gcn_block_pool, program.ins_list}; Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program. // Structurize control flow graph and create program.

View File

@ -3,16 +3,16 @@
#pragma once #pragma once
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader { namespace Shader {
struct Profile; struct Profile;
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Common::ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info, std::span<const u32> code, const Info&& info,
const Profile& profile); const Profile& profile);

View File

@ -13,7 +13,7 @@
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h"
namespace AmdGpu { namespace AmdGpu {
struct Liverpool; struct Liverpool;
@ -37,6 +37,14 @@ public:
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB; static constexpr u64 DEVICE_PAGESIZE = 4_KB;
struct Traits {
using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 39;
static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS;
};
using PageTable = MultiLevelPageTable<Traits>;
struct OverlapResult { struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids; boost::container::small_vector<BufferId, 16> ids;
VAddr begin; VAddr begin;
@ -115,7 +123,7 @@ private:
std::recursive_mutex mutex; std::recursive_mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; PageTable page_table;
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <type_traits>
#include <utility>
#include <vector>
#include "common/object_pool.h"
#include "common/types.h"
namespace VideoCore {
template <class Traits>
class MultiLevelPageTable final {
using Entry = typename Traits::Entry;
static constexpr size_t AddressSpaceBits = Traits::AddressSpaceBits;
static constexpr size_t FirstLevelBits = Traits::FirstLevelBits;
static constexpr size_t PageBits = Traits::PageBits;
static constexpr size_t FirstLevelShift = AddressSpaceBits - FirstLevelBits;
static constexpr size_t SecondLevelBits = FirstLevelShift - PageBits;
static constexpr size_t NumEntriesPerL1Page = 1ULL << SecondLevelBits;
using L1Page = std::array<Entry, NumEntriesPerL1Page>;
public:
explicit MultiLevelPageTable() : first_level_map{1ULL << FirstLevelBits, nullptr} {}
~MultiLevelPageTable() noexcept = default;
[[nodiscard]] Entry* find(size_t page) {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
return nullptr;
}
return &(*first_level_map[l1_page])[l2_page];
}
[[nodiscard]] const Entry& operator[](size_t page) const {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
first_level_map[l1_page] = page_alloc.Create();
}
return (*first_level_map[l1_page])[l2_page];
}
[[nodiscard]] Entry& operator[](size_t page) {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
first_level_map[l1_page] = page_alloc.Create();
}
return (*first_level_map[l1_page])[l2_page];
}
private:
std::vector<L1Page*> first_level_map{};
Common::ObjectPool<L1Page> page_alloc;
};
} // namespace VideoCore

View File

@ -22,7 +22,44 @@ namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12; constexpr size_t PAGEBITS = 12;
#ifdef SHADPS4_USERFAULTFD #ifdef _WIN64
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
}
void OnMap(VAddr address, size_t size) {}
void OnUnmap(VAddr address, size_t size) {}
void Protect(VAddr address, size_t size, bool allow_write) {
DWORD prot = PROT_READ | (allow_write ? PROT_WRITE : 0);
DWORD old_prot{};
BOOL result = VirtualProtect(std::bit_cast<LPVOID>(address), len, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
}
static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
if (ec == EXCEPTION_ACCESS_VIOLATION) {
const auto info = pExp->ExceptionRecord->ExceptionInformation;
if (info[0] == 1) { // Write violation
rasterizer->InvalidateMemory(info[1], sizeof(u64));
return EXCEPTION_CONTINUE_EXECUTION;
} /* else {
UNREACHABLE();
}*/
}
return EXCEPTION_CONTINUE_SEARCH; // pass further
}
inline static Vulkan::Rasterizer* rasterizer;
void* veh_handle{};
};
#elif ENABLE_USERFAULTFD
struct PageManager::Impl { struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@ -121,9 +158,15 @@ struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) { Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_; rasterizer = rasterizer_;
#ifdef __APPLE__
// Read-only memory write results in SIGBUS on Apple.
static constexpr int SignalType = SIGBUS;
#else
static constexpr int SignalType = SIGSEGV;
#endif
sigset_t signal_mask; sigset_t signal_mask;
sigemptyset(&signal_mask); sigemptyset(&signal_mask);
sigaddset(&signal_mask, SIGSEGV); sigaddset(&signal_mask, SignalType);
using HandlerType = decltype(sigaction::sa_sigaction); using HandlerType = decltype(sigaction::sa_sigaction);
@ -131,7 +174,7 @@ struct PageManager::Impl {
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK; guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler; guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask; guest_access_fault.sa_mask = signal_mask;
sigaction(SIGSEGV, &guest_access_fault, nullptr); sigaction(SignalType, &guest_access_fault, nullptr);
} }
void OnMap(VAddr address, size_t size) {} void OnMap(VAddr address, size_t size) {}
@ -148,7 +191,7 @@ struct PageManager::Impl {
const VAddr address = reinterpret_cast<VAddr>(info->si_addr); const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR]; const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
if (err & 0x2) { if (err & 0x2) {
rasterizer->InvalidateMemory(address, PAGESIZE); rasterizer->InvalidateMemory(address, sizeof(u64));
} else { } else {
// Read not supported! // Read not supported!
UNREACHABLE(); UNREACHABLE();

View File

@ -346,19 +346,26 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
} }
for (const auto& buffer : stage.buffers) { for (const auto& buffer : stage.buffers) {
const auto vsharp = buffer.GetVsharp(stage); const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address; if (vsharp) {
const u32 size = vsharp.GetSize(); const VAddr address = vsharp.base_address;
const u32 alignment = if (texture_cache.IsMeta(address)) {
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
const auto [vk_buffer, offset] = }
buffer_cache.ObtainBuffer(address, size, buffer.is_written); const u32 size = vsharp.GetSize();
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment()
const u32 adjust = offset - offset_aligned; : instance.UniformMinAlignment();
if (adjust != 0) { const auto [vk_buffer, offset] =
ASSERT(adjust % 4 == 0); buffer_cache.ObtainBuffer(address, size, buffer.is_written);
push_data.AddOffset(binding, adjust); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} else {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} }
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding++,
@ -368,10 +375,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
: vk::DescriptorType::eUniformBuffer, : vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(), .pBufferInfo = &buffer_infos.back(),
}); });
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
} }
boost::container::static_vector<AmdGpu::Image, 16> tsharps; boost::container::static_vector<AmdGpu::Image, 16> tsharps;

View File

@ -204,7 +204,8 @@ bool Instance::CreateDevice() {
// The next two extensions are required to be available together in order to support write masks // The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions. // with extensions.
@ -303,12 +304,19 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayoutScalarBlockLayout = true, .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true, .workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true, .workgroupMemoryExplicitLayout16BitAccess = true,
}}; },
vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true,
},
};
if (!color_write_en) { if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
} }
if (!robustness) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
try { try {
device = physical_device.createDeviceUnique(device_chain.get()); device = physical_device.createDeviceUnique(device_chain.get());

View File

@ -5,7 +5,6 @@
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/profile.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@ -51,8 +50,8 @@ private:
Shader::Profile profile{}; Shader::Profile profile{};
GraphicsPipelineKey graphics_key{}; GraphicsPipelineKey graphics_key{};
u64 compute_key{}; u64 compute_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool; Common::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool; Common::ObjectPool<Shader::IR::Block> block_pool;
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -23,6 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
liverpool->BindRasterizer(this); liverpool->BindRasterizer(this);
} }
memory->SetRasterizer(this); memory->SetRasterizer(this);
wfi_event = instance.GetDevice().createEventUnique({});
} }
Rasterizer::~Rasterizer() = default; Rasterizer::~Rasterizer() = default;

View File

@ -60,6 +60,7 @@ private:
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory; Core::MemoryManager* memory;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
vk::UniqueEvent wfi_event;
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -300,11 +300,11 @@ void TextureCache::UnregisterImage(ImageId image_id) {
image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page); const auto page_it = page_table.find(page);
if (page_it == page_table.end()) { if (page_it == nullptr) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
return; return;
} }
auto& image_ids = page_it.value(); auto& image_ids = *page_it;
const auto vector_it = std::ranges::find(image_ids, image_id); const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) { if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);

View File

@ -9,6 +9,7 @@
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/multi_level_page_table.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/image_view.h"
@ -25,12 +26,13 @@ class BufferCache;
class PageManager; class PageManager;
class TextureCache { class TextureCache {
// This is the page shift for adding images into the hash map. It isn't related to struct Traits {
// the page size of the guest or the host and is chosen for convenience. A number too using Entry = boost::container::small_vector<ImageId, 16>;
// small will increase the number of hash map lookups per image, while too large will static constexpr size_t AddressSpaceBits = 39;
// increase the number of images per page. static constexpr size_t FirstLevelBits = 9;
static constexpr u64 PageBits = 22; static constexpr size_t PageBits = 22;
static constexpr u64 PageMask = (1ULL << PageBits) - 1; };
using PageTable = MultiLevelPageTable<Traits>;
public: public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
@ -97,8 +99,8 @@ private:
template <typename Func> template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) { static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PageBits; const u64 page_end = (addr + size - 1) >> Traits::PageBits;
for (u64 page = addr >> PageBits; page <= page_end; ++page) { for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) { if constexpr (RETURNS_BOOL) {
if (func(page)) { if (func(page)) {
break; break;
@ -116,14 +118,14 @@ private:
boost::container::small_vector<ImageId, 32> images; boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page); const auto it = page_table.find(page);
if (it == page_table.end()) { if (it == nullptr) {
if constexpr (BOOL_BREAK) { if constexpr (BOOL_BREAK) {
return false; return false;
} else { } else {
return; return;
} }
} }
for (const ImageId image_id : it->second) { for (const ImageId image_id : *it) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (image.flags & ImageFlagBits::Picked) { if (image.flags & ImageFlagBits::Picked) {
continue; continue;
@ -175,7 +177,7 @@ private:
Common::SlotVector<Image> slot_images; Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageView> slot_image_views; Common::SlotVector<ImageView> slot_image_views;
tsl::robin_map<u64, Sampler> samplers; tsl::robin_map<u64, Sampler> samplers;
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table; PageTable page_table;
boost::icl::interval_map<VAddr, s32> cached_pages; boost::icl::interval_map<VAddr, s32> cached_pages;
std::mutex mutex; std::mutex mutex;

View File

@ -202,12 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eBc5UnormBlock: case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock: case vk::Format::eBc7UnormBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eR32G32B32A32Sfloat: case vk::Format::eR32G32B32A32Sfloat:
return vk::Format::eR32G32B32A32Uint; return vk::Format::eR32G32B32A32Uint;
default: default:
break; break;
} }
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
// Log missing formats only once to avoid spamming the log.
static constexpr size_t MaxFormatIndex = 256;
static std::array<bool, MaxFormatIndex> logged_formats{};
if (const u32 index = u32(format); !logged_formats[index]) {
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
logged_formats[index] = true;
}
return format; return format;
} }