video_core: Use multi level page table for caches
This commit is contained in:
parent
efcabbfd2a
commit
b387ce9bbd
|
@ -283,6 +283,7 @@ set(COMMON src/common/logging/backend.cpp
|
|||
src/common/native_clock.h
|
||||
src/common/path_util.cpp
|
||||
src/common/path_util.h
|
||||
src/common/object_pool.h
|
||||
src/common/polyfill_thread.h
|
||||
src/common/rdtsc.cpp
|
||||
src/common/rdtsc.h
|
||||
|
@ -368,7 +369,6 @@ set(CORE src/core/aerolib/stubs.cpp
|
|||
)
|
||||
|
||||
set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/object_pool.h
|
||||
src/shader_recompiler/profile.h
|
||||
src/shader_recompiler/recompiler.cpp
|
||||
src/shader_recompiler/recompiler.h
|
||||
|
@ -457,7 +457,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/buffer_cache/buffer_cache.cpp
|
||||
src/video_core/buffer_cache/buffer_cache.h
|
||||
src/video_core/buffer_cache/memory_tracker_base.h
|
||||
src/video_core/buffer_cache/range_set.cpp
|
||||
src/video_core/buffer_cache/range_set.h
|
||||
src/video_core/buffer_cache/word_manager.h
|
||||
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
||||
|
@ -507,6 +506,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/texture_cache/types.h
|
||||
src/video_core/page_manager.cpp
|
||||
src/video_core/page_manager.h
|
||||
src/video_core/multi_level_page_table.h
|
||||
src/video_core/renderdoc.cpp
|
||||
src/video_core/renderdoc.h
|
||||
)
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace Shader {
|
||||
namespace Common {
|
||||
|
||||
template <typename T>
|
||||
requires std::is_destructible_v<T>
|
||||
|
@ -104,4 +104,4 @@ private:
|
|||
size_t new_chunk_size{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
} // namespace Common
|
|
@ -465,7 +465,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) {
|
|||
|
||||
int result = pthread_mutex_destroy(&(*mutex)->pth_mutex);
|
||||
|
||||
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
LOG_DEBUG(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
|
||||
delete *mutex;
|
||||
*mutex = nullptr;
|
||||
|
@ -725,7 +725,7 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
|
|||
}
|
||||
int result = pthread_cond_destroy(&(*cond)->cond);
|
||||
|
||||
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
|
||||
LOG_DEBUG(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
|
||||
|
||||
delete *cond;
|
||||
*cond = nullptr;
|
||||
|
@ -811,8 +811,6 @@ int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMu
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
|
||||
LOG_INFO(Kernel_Pthread,
|
||||
"posix posix_pthread_cond_broadcast redirect to scePthreadCondBroadcast");
|
||||
int result = scePthreadCondBroadcast(cond);
|
||||
if (result != 0) {
|
||||
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
|
||||
|
@ -824,7 +822,6 @@ int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
|
||||
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
|
||||
int result = scePthreadMutexattrInit(attr);
|
||||
if (result < 0) {
|
||||
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
|
||||
|
@ -836,7 +833,6 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) {
|
||||
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
|
||||
int result = scePthreadMutexattrSettype(attr, type);
|
||||
if (result < 0) {
|
||||
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
|
||||
|
@ -861,7 +857,6 @@ int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_ro
|
|||
|
||||
int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) {
|
||||
int result = scePthreadMutexattrSetprotocol(attr, protocol);
|
||||
LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result);
|
||||
if (result < 0) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -1304,8 +1299,6 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det
|
|||
int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr,
|
||||
PthreadEntryFunc start_routine, void* arg,
|
||||
const char* name) {
|
||||
LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name);
|
||||
|
||||
int result = scePthreadCreate(thread, attr, start_routine, arg, name);
|
||||
if (result != 0) {
|
||||
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
|
||||
|
@ -1352,17 +1345,11 @@ int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadC
|
|||
|
||||
int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) {
|
||||
int result = scePthreadCondSignal(cond);
|
||||
LOG_INFO(Kernel_Pthread,
|
||||
"posix posix_pthread_cond_signal redirect to scePthreadCondSignal, result = {}",
|
||||
result);
|
||||
return result;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) {
|
||||
int result = scePthreadCondDestroy(cond);
|
||||
LOG_INFO(Kernel_Pthread,
|
||||
"posix posix_pthread_cond_destroy redirect to scePthreadCondDestroy, result = {}",
|
||||
result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -470,7 +470,7 @@ int PS4_SYSV_ABI scePadSetUserColor() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) {
|
||||
LOG_ERROR(Lib_Pad, "(STUBBED) called");
|
||||
LOG_DEBUG(Lib_Pad, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
}
|
||||
}
|
||||
|
||||
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||
index_to_pc.resize(inst_list.size() + 1);
|
||||
EmitLabels();
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -49,7 +49,7 @@ class CFG {
|
|||
using Label = u32;
|
||||
|
||||
public:
|
||||
explicit CFG(ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
|
||||
explicit CFG(Common::ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
|
||||
|
||||
[[nodiscard]] std::string Dot() const;
|
||||
|
||||
|
@ -59,7 +59,7 @@ private:
|
|||
void LinkBlocks();
|
||||
|
||||
public:
|
||||
ObjectPool<Block>& block_pool;
|
||||
Common::ObjectPool<Block>& block_pool;
|
||||
std::span<const GcnInst> inst_list;
|
||||
std::vector<u32> index_to_pc;
|
||||
boost::container::small_vector<Label, 16> labels;
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translate();
|
||||
|
||||
} // namespace Shader::Gcn
|
|
@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
|
|||
*/
|
||||
class GotoPass {
|
||||
public:
|
||||
explicit GotoPass(CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
|
||||
explicit GotoPass(CFG& cfg, Common::ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
|
||||
std::vector gotos{BuildTree(cfg)};
|
||||
const auto end{gotos.rend()};
|
||||
for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
|
||||
|
@ -563,7 +563,7 @@ private:
|
|||
return parent_tree.insert(std::next(loop), *new_goto);
|
||||
}
|
||||
|
||||
ObjectPool<Statement>& pool;
|
||||
Common::ObjectPool<Statement>& pool;
|
||||
Statement root_stmt{FunctionTag{}};
|
||||
};
|
||||
|
||||
|
@ -597,8 +597,9 @@ private:
|
|||
|
||||
class TranslatePass {
|
||||
public:
|
||||
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
|
||||
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
TranslatePass(Common::ObjectPool<IR::Inst>& inst_pool_,
|
||||
Common::ObjectPool<IR::Block>& block_pool_,
|
||||
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||
Info& info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
|
@ -808,9 +809,9 @@ private:
|
|||
return block_pool.Create(inst_pool);
|
||||
}
|
||||
|
||||
ObjectPool<Statement>& stmt_pool;
|
||||
ObjectPool<IR::Inst>& inst_pool;
|
||||
ObjectPool<IR::Block>& block_pool;
|
||||
Common::ObjectPool<Statement>& stmt_pool;
|
||||
Common::ObjectPool<IR::Inst>& inst_pool;
|
||||
Common::ObjectPool<IR::Block>& block_pool;
|
||||
IR::AbstractSyntaxList& syntax_list;
|
||||
const Block dummy_flow_block{.is_dummy = true};
|
||||
std::span<const GcnInst> inst_list;
|
||||
|
@ -819,9 +820,10 @@ private:
|
|||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
CFG& cfg, Info& info, const Profile& profile) {
|
||||
ObjectPool<Statement> stmt_pool{64};
|
||||
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
|
||||
const Profile& profile) {
|
||||
Common::ObjectPool<Statement> stmt_pool{64};
|
||||
GotoPass goto_pass{cfg, stmt_pool};
|
||||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
|
@ -16,8 +15,8 @@ struct Profile;
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
Info& info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
|
||||
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
|
||||
Block::Block(Common::ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
|
||||
|
||||
Block::~Block() = default;
|
||||
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
#include <vector>
|
||||
#include <boost/intrusive/list.hpp>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -25,7 +25,7 @@ public:
|
|||
using reverse_iterator = InstructionList::reverse_iterator;
|
||||
using const_reverse_iterator = InstructionList::const_reverse_iterator;
|
||||
|
||||
explicit Block(ObjectPool<Inst>& inst_pool_);
|
||||
explicit Block(Common::ObjectPool<Inst>& inst_pool_);
|
||||
~Block();
|
||||
|
||||
Block(const Block&) = delete;
|
||||
|
@ -153,7 +153,7 @@ public:
|
|||
|
||||
private:
|
||||
/// Memory pool for instruction list
|
||||
ObjectPool<Inst>* inst_pool;
|
||||
Common::ObjectPool<Inst>* inst_pool;
|
||||
|
||||
/// List of instructions in this block
|
||||
InstructionList instructions;
|
||||
|
|
|
@ -433,6 +433,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
return;
|
||||
}
|
||||
|
||||
if (IsLoadBufferFormat(inst)) {
|
||||
if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) {
|
||||
info.uses_fp16 = true;
|
||||
}
|
||||
} else {
|
||||
const u32 stride = buffer.GetStride();
|
||||
if (stride < 4) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"non-formatting load_buffer_* is not implemented for stride {}", stride);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute address of the buffer using the stride.
|
||||
// Todo: What if buffer is rebound with different stride?
|
||||
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
|
||||
|
|
|
@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
return blocks;
|
||||
}
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> token, const Info&& info,
|
||||
const Profile& profile) {
|
||||
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
|
||||
const Info&& info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
}
|
||||
|
||||
// Create control flow graph
|
||||
ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
||||
|
||||
// Structurize control flow graph and create program.
|
||||
|
|
|
@ -3,16 +3,16 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool,
|
||||
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code, const Info&& info,
|
||||
const Profile& profile);
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
struct Liverpool;
|
||||
|
@ -37,6 +37,14 @@ public:
|
|||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
|
||||
|
||||
struct Traits {
|
||||
using Entry = BufferId;
|
||||
static constexpr size_t AddressSpaceBits = 39;
|
||||
static constexpr size_t FirstLevelBits = 14;
|
||||
static constexpr size_t PageBits = CACHING_PAGEBITS;
|
||||
};
|
||||
using PageTable = MultiLevelPageTable<Traits>;
|
||||
|
||||
struct OverlapResult {
|
||||
boost::container::small_vector<BufferId, 16> ids;
|
||||
VAddr begin;
|
||||
|
@ -115,7 +123,7 @@ private:
|
|||
std::recursive_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
MemoryTracker memory_tracker;
|
||||
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
|
||||
PageTable page_table;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
template <class Traits>
|
||||
class MultiLevelPageTable final {
|
||||
using Entry = typename Traits::Entry;
|
||||
|
||||
static constexpr size_t AddressSpaceBits = Traits::AddressSpaceBits;
|
||||
static constexpr size_t FirstLevelBits = Traits::FirstLevelBits;
|
||||
static constexpr size_t PageBits = Traits::PageBits;
|
||||
static constexpr size_t FirstLevelShift = AddressSpaceBits - FirstLevelBits;
|
||||
static constexpr size_t SecondLevelBits = FirstLevelShift - PageBits;
|
||||
static constexpr size_t NumEntriesPerL1Page = 1ULL << SecondLevelBits;
|
||||
|
||||
using L1Page = std::array<Entry, NumEntriesPerL1Page>;
|
||||
|
||||
public:
|
||||
explicit MultiLevelPageTable() : first_level_map{1ULL << FirstLevelBits, nullptr} {}
|
||||
|
||||
~MultiLevelPageTable() noexcept = default;
|
||||
|
||||
[[nodiscard]] Entry* find(size_t page) {
|
||||
const size_t l1_page = page >> SecondLevelBits;
|
||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||
if (!first_level_map[l1_page]) {
|
||||
return nullptr;
|
||||
}
|
||||
return &(*first_level_map[l1_page])[l2_page];
|
||||
}
|
||||
|
||||
[[nodiscard]] const Entry& operator[](size_t page) const {
|
||||
const size_t l1_page = page >> SecondLevelBits;
|
||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||
if (!first_level_map[l1_page]) {
|
||||
first_level_map[l1_page] = page_alloc.Create();
|
||||
}
|
||||
return (*first_level_map[l1_page])[l2_page];
|
||||
}
|
||||
|
||||
[[nodiscard]] Entry& operator[](size_t page) {
|
||||
const size_t l1_page = page >> SecondLevelBits;
|
||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||
if (!first_level_map[l1_page]) {
|
||||
first_level_map[l1_page] = page_alloc.Create();
|
||||
}
|
||||
return (*first_level_map[l1_page])[l2_page];
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<L1Page*> first_level_map{};
|
||||
Common::ObjectPool<L1Page> page_alloc;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
|
@ -22,7 +22,44 @@ namespace VideoCore {
|
|||
constexpr size_t PAGESIZE = 4_KB;
|
||||
constexpr size_t PAGEBITS = 12;
|
||||
|
||||
#ifdef SHADPS4_USERFAULTFD
|
||||
#ifdef _WIN64
|
||||
struct PageManager::Impl {
|
||||
Impl(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
|
||||
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
|
||||
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
|
||||
}
|
||||
|
||||
void OnMap(VAddr address, size_t size) {}
|
||||
|
||||
void OnUnmap(VAddr address, size_t size) {}
|
||||
|
||||
void Protect(VAddr address, size_t size, bool allow_write) {
|
||||
DWORD prot = PROT_READ | (allow_write ? PROT_WRITE : 0);
|
||||
DWORD old_prot{};
|
||||
BOOL result = VirtualProtect(std::bit_cast<LPVOID>(address), len, prot, &old_prot);
|
||||
ASSERT_MSG(result != 0, "Region protection failed");
|
||||
}
|
||||
|
||||
static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
|
||||
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
|
||||
if (ec == EXCEPTION_ACCESS_VIOLATION) {
|
||||
const auto info = pExp->ExceptionRecord->ExceptionInformation;
|
||||
if (info[0] == 1) { // Write violation
|
||||
rasterizer->InvalidateMemory(info[1], sizeof(u64));
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
} /* else {
|
||||
UNREACHABLE();
|
||||
}*/
|
||||
}
|
||||
return EXCEPTION_CONTINUE_SEARCH; // pass further
|
||||
}
|
||||
|
||||
inline static Vulkan::Rasterizer* rasterizer;
|
||||
void* veh_handle{};
|
||||
};
|
||||
#elif ENABLE_USERFAULTFD
|
||||
struct PageManager::Impl {
|
||||
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
|
@ -121,9 +158,15 @@ struct PageManager::Impl {
|
|||
Impl(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Read-only memory write results in SIGBUS on Apple.
|
||||
static constexpr int SignalType = SIGBUS;
|
||||
#else
|
||||
static constexpr int SignalType = SIGSEGV;
|
||||
#endif
|
||||
sigset_t signal_mask;
|
||||
sigemptyset(&signal_mask);
|
||||
sigaddset(&signal_mask, SIGSEGV);
|
||||
sigaddset(&signal_mask, SignalType);
|
||||
|
||||
using HandlerType = decltype(sigaction::sa_sigaction);
|
||||
|
||||
|
@ -131,7 +174,7 @@ struct PageManager::Impl {
|
|||
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
|
||||
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
|
||||
guest_access_fault.sa_mask = signal_mask;
|
||||
sigaction(SIGSEGV, &guest_access_fault, nullptr);
|
||||
sigaction(SignalType, &guest_access_fault, nullptr);
|
||||
}
|
||||
|
||||
void OnMap(VAddr address, size_t size) {}
|
||||
|
@ -148,7 +191,7 @@ struct PageManager::Impl {
|
|||
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
|
||||
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
|
||||
if (err & 0x2) {
|
||||
rasterizer->InvalidateMemory(address, PAGESIZE);
|
||||
rasterizer->InvalidateMemory(address, sizeof(u64));
|
||||
} else {
|
||||
// Read not supported!
|
||||
UNREACHABLE();
|
||||
|
|
|
@ -346,10 +346,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
}
|
||||
for (const auto& buffer : stage.buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(stage);
|
||||
if (vsharp) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 alignment =
|
||||
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment()
|
||||
: instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
|
@ -359,6 +363,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
push_data.AddOffset(binding, adjust);
|
||||
}
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
|
||||
} else {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
|
@ -368,10 +375,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
|
||||
}
|
||||
}
|
||||
|
||||
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
||||
|
|
|
@ -204,7 +204,8 @@ bool Instance::CreateDevice() {
|
|||
// The next two extensions are required to be available together in order to support write masks
|
||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
||||
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
|
@ -303,12 +304,19 @@ bool Instance::CreateDevice() {
|
|||
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
|
||||
.workgroupMemoryExplicitLayout8BitAccess = true,
|
||||
.workgroupMemoryExplicitLayout16BitAccess = true,
|
||||
}};
|
||||
},
|
||||
vk::PhysicalDeviceRobustness2FeaturesEXT{
|
||||
.nullDescriptor = true,
|
||||
},
|
||||
};
|
||||
|
||||
if (!color_write_en) {
|
||||
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
||||
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
||||
}
|
||||
if (!robustness) {
|
||||
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
||||
}
|
||||
|
||||
try {
|
||||
device = physical_device.createDeviceUnique(device_chain.get());
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <tsl/robin_map.h>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
@ -51,8 +50,8 @@ private:
|
|||
Shader::Profile profile{};
|
||||
GraphicsPipelineKey graphics_key{};
|
||||
u64 compute_key{};
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -23,6 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
memory->SetRasterizer(this);
|
||||
wfi_event = instance.GetDevice().createEventUnique({});
|
||||
}
|
||||
|
||||
Rasterizer::~Rasterizer() = default;
|
||||
|
|
|
@ -60,6 +60,7 @@ private:
|
|||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
PipelineCache pipeline_cache;
|
||||
vk::UniqueEvent wfi_event;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -300,11 +300,11 @@ void TextureCache::UnregisterImage(ImageId image_id) {
|
|||
image.flags &= ~ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == page_table.end()) {
|
||||
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
|
||||
if (page_it == nullptr) {
|
||||
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
|
||||
return;
|
||||
}
|
||||
auto& image_ids = page_it.value();
|
||||
auto& image_ids = *page_it;
|
||||
const auto vector_it = std::ranges::find(image_ids, image_id);
|
||||
if (vector_it == image_ids.end()) {
|
||||
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "common/slot_vector.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
|
@ -25,12 +26,13 @@ class BufferCache;
|
|||
class PageManager;
|
||||
|
||||
class TextureCache {
|
||||
// This is the page shift for adding images into the hash map. It isn't related to
|
||||
// the page size of the guest or the host and is chosen for convenience. A number too
|
||||
// small will increase the number of hash map lookups per image, while too large will
|
||||
// increase the number of images per page.
|
||||
static constexpr u64 PageBits = 22;
|
||||
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
|
||||
struct Traits {
|
||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||
static constexpr size_t AddressSpaceBits = 39;
|
||||
static constexpr size_t FirstLevelBits = 9;
|
||||
static constexpr size_t PageBits = 22;
|
||||
};
|
||||
using PageTable = MultiLevelPageTable<Traits>;
|
||||
|
||||
public:
|
||||
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
|
@ -97,8 +99,8 @@ private:
|
|||
template <typename Func>
|
||||
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||
const u64 page_end = (addr + size - 1) >> PageBits;
|
||||
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
|
||||
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
|
||||
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(page)) {
|
||||
break;
|
||||
|
@ -116,14 +118,14 @@ private:
|
|||
boost::container::small_vector<ImageId, 32> images;
|
||||
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
|
||||
const auto it = page_table.find(page);
|
||||
if (it == page_table.end()) {
|
||||
if (it == nullptr) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
return false;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (const ImageId image_id : it->second) {
|
||||
for (const ImageId image_id : *it) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (image.flags & ImageFlagBits::Picked) {
|
||||
continue;
|
||||
|
@ -175,7 +177,7 @@ private:
|
|||
Common::SlotVector<Image> slot_images;
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
||||
PageTable page_table;
|
||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||
std::mutex mutex;
|
||||
|
||||
|
|
|
@ -202,12 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eR32G32B32A32Sfloat:
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Log missing formats only once to avoid spamming the log.
|
||||
static constexpr size_t MaxFormatIndex = 256;
|
||||
static std::array<bool, MaxFormatIndex> logged_formats{};
|
||||
if (const u32 index = u32(format); !logged_formats[index]) {
|
||||
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||
logged_formats[index] = true;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue