video_core: Use multi level page table for caches

This commit is contained in:
IndecisiveTurtle 2024-08-07 19:44:10 +03:00
parent efcabbfd2a
commit b387ce9bbd
26 changed files with 229 additions and 101 deletions

View File

@ -283,6 +283,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/native_clock.h
src/common/path_util.cpp
src/common/path_util.h
src/common/object_pool.h
src/common/polyfill_thread.h
src/common/rdtsc.cpp
src/common/rdtsc.h
@ -368,7 +369,6 @@ set(CORE src/core/aerolib/stubs.cpp
)
set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/object_pool.h
src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h
@ -457,7 +457,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/buffer_cache/buffer_cache.cpp
src/video_core/buffer_cache/buffer_cache.h
src/video_core/buffer_cache/memory_tracker_base.h
src/video_core/buffer_cache/range_set.cpp
src/video_core/buffer_cache/range_set.h
src/video_core/buffer_cache/word_manager.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@ -507,6 +506,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/texture_cache/types.h
src/video_core/page_manager.cpp
src/video_core/page_manager.h
src/video_core/multi_level_page_table.h
src/video_core/renderdoc.cpp
src/video_core/renderdoc.h
)

View File

@ -8,7 +8,7 @@
#include <utility>
#include <vector>
namespace Shader {
namespace Common {
template <typename T>
requires std::is_destructible_v<T>
@ -104,4 +104,4 @@ private:
size_t new_chunk_size{};
};
} // namespace Shader
} // namespace Common

View File

@ -465,7 +465,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) {
int result = pthread_mutex_destroy(&(*mutex)->pth_mutex);
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
LOG_DEBUG(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
delete *mutex;
*mutex = nullptr;
@ -725,7 +725,7 @@ int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
}
int result = pthread_cond_destroy(&(*cond)->cond);
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
LOG_DEBUG(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
delete *cond;
*cond = nullptr;
@ -811,8 +811,6 @@ int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMu
}
int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_broadcast redirect to scePthreadCondBroadcast");
int result = scePthreadCondBroadcast(cond);
if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -824,7 +822,6 @@ int PS4_SYSV_ABI posix_pthread_cond_broadcast(ScePthreadCond* cond) {
}
int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadMutexattrInit(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -836,7 +833,6 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
}
int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexattrSettype(attr, type);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -861,7 +857,6 @@ int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_ro
int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) {
int result = scePthreadMutexattrSetprotocol(attr, protocol);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
@ -1304,8 +1299,6 @@ int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int det
int PS4_SYSV_ABI posix_pthread_create_name_np(ScePthread* thread, const ScePthreadAttr* attr,
PthreadEntryFunc start_routine, void* arg,
const char* name) {
LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate: name = {}", name);
int result = scePthreadCreate(thread, attr, start_routine, arg, name);
if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
@ -1352,17 +1345,11 @@ int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadC
int PS4_SYSV_ABI posix_pthread_cond_signal(ScePthreadCond* cond) {
int result = scePthreadCondSignal(cond);
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_signal redirect to scePthreadCondSignal, result = {}",
result);
return result;
}
int PS4_SYSV_ABI posix_pthread_cond_destroy(ScePthreadCond* cond) {
int result = scePthreadCondDestroy(cond);
LOG_INFO(Kernel_Pthread,
"posix posix_pthread_cond_destroy redirect to scePthreadCondDestroy, result = {}",
result);
return result;
}

View File

@ -470,7 +470,7 @@ int PS4_SYSV_ABI scePadSetUserColor() {
}
int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) {
LOG_ERROR(Lib_Pad, "(STUBBED) called");
LOG_DEBUG(Lib_Pad, "(STUBBED) called");
return ORBIS_OK;
}
@ -665,4 +665,4 @@ void RegisterlibScePad(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("7xA+hFtvBCA", "libScePad", 1, "libScePad", 1, 1, Func_EF103E845B6F0420);
};
} // namespace Libraries::Pad
} // namespace Libraries::Pad

View File

@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
}
}
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
: block_pool{block_pool_}, inst_list{inst_list_} {
index_to_pc.resize(inst_list.size() + 1);
EmitLabels();

View File

@ -8,10 +8,10 @@
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/set.hpp>
#include "common/object_pool.h"
#include "common/types.h"
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Gcn {
@ -49,7 +49,7 @@ class CFG {
using Label = u32;
public:
explicit CFG(ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
explicit CFG(Common::ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
[[nodiscard]] std::string Dot() const;
@ -59,7 +59,7 @@ private:
void LinkBlocks();
public:
ObjectPool<Block>& block_pool;
Common::ObjectPool<Block>& block_pool;
std::span<const GcnInst> inst_list;
std::vector<u32> index_to_pc;
boost::container::small_vector<Label, 16> labels;

View File

@ -1,10 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Shader::Gcn {
void Translate();
} // namespace Shader::Gcn

View File

@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
*/
class GotoPass {
public:
explicit GotoPass(CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
explicit GotoPass(CFG& cfg, Common::ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
std::vector gotos{BuildTree(cfg)};
const auto end{gotos.rend()};
for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
@ -563,7 +563,7 @@ private:
return parent_tree.insert(std::next(loop), *new_goto);
}
ObjectPool<Statement>& pool;
Common::ObjectPool<Statement>& pool;
Statement root_stmt{FunctionTag{}};
};
@ -597,8 +597,9 @@ private:
class TranslatePass {
public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
TranslatePass(Common::ObjectPool<IR::Inst>& inst_pool_,
Common::ObjectPool<IR::Block>& block_pool_,
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Info& info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
@ -808,9 +809,9 @@ private:
return block_pool.Create(inst_pool);
}
ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
Common::ObjectPool<Statement>& stmt_pool;
Common::ObjectPool<IR::Inst>& inst_pool;
Common::ObjectPool<IR::Block>& block_pool;
IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{.is_dummy = true};
std::span<const GcnInst> inst_list;
@ -819,9 +820,10 @@ private:
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Info& info, const Profile& profile) {
ObjectPool<Statement> stmt_pool{64};
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
const Profile& profile) {
Common::ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;

View File

@ -7,7 +7,6 @@
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
struct Info;
@ -16,8 +15,8 @@ struct Profile;
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
Info& info, const Profile& profile);
} // namespace Shader::Gcn

View File

@ -9,7 +9,7 @@
namespace Shader::IR {
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::Block(Common::ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::~Block() = default;

View File

@ -9,10 +9,10 @@
#include <vector>
#include <boost/intrusive/list.hpp>
#include "common/object_pool.h"
#include "common/types.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::IR {
@ -25,7 +25,7 @@ public:
using reverse_iterator = InstructionList::reverse_iterator;
using const_reverse_iterator = InstructionList::const_reverse_iterator;
explicit Block(ObjectPool<Inst>& inst_pool_);
explicit Block(Common::ObjectPool<Inst>& inst_pool_);
~Block();
Block(const Block&) = delete;
@ -153,7 +153,7 @@ public:
private:
/// Memory pool for instruction list
ObjectPool<Inst>* inst_pool;
Common::ObjectPool<Inst>* inst_pool;
/// List of instructions in this block
InstructionList instructions;

View File

@ -433,6 +433,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
return;
}
if (IsLoadBufferFormat(inst)) {
if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) {
info.uses_fp16 = true;
}
} else {
const u32 stride = buffer.GetStride();
if (stride < 4) {
LOG_WARNING(Render_Vulkan,
"non-formatting load_buffer_* is not implemented for stride {}", stride);
}
}
// Compute address of the buffer using the stride.
// Todo: What if buffer is rebound with different stride?
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());

View File

@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks;
}
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info,
const Profile& profile) {
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
const Info&& info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
// Create control flow graph
ObjectPool<Gcn::Block> gcn_block_pool{64};
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.

View File

@ -3,16 +3,16 @@
#pragma once
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
struct Profile;
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info,
const Profile& profile);

View File

@ -13,7 +13,7 @@
#include "common/types.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h"
namespace AmdGpu {
struct Liverpool;
@ -37,6 +37,14 @@ public:
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
struct Traits {
using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 39;
static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS;
};
using PageTable = MultiLevelPageTable<Traits>;
struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids;
VAddr begin;
@ -115,7 +123,7 @@ private:
std::recursive_mutex mutex;
Common::SlotVector<Buffer> slot_buffers;
MemoryTracker memory_tracker;
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
PageTable page_table;
};
} // namespace VideoCore

View File

@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <type_traits>
#include <utility>
#include <vector>
#include "common/object_pool.h"
#include "common/types.h"
namespace VideoCore {
template <class Traits>
class MultiLevelPageTable final {
using Entry = typename Traits::Entry;
static constexpr size_t AddressSpaceBits = Traits::AddressSpaceBits;
static constexpr size_t FirstLevelBits = Traits::FirstLevelBits;
static constexpr size_t PageBits = Traits::PageBits;
static constexpr size_t FirstLevelShift = AddressSpaceBits - FirstLevelBits;
static constexpr size_t SecondLevelBits = FirstLevelShift - PageBits;
static constexpr size_t NumEntriesPerL1Page = 1ULL << SecondLevelBits;
using L1Page = std::array<Entry, NumEntriesPerL1Page>;
public:
explicit MultiLevelPageTable() : first_level_map{1ULL << FirstLevelBits, nullptr} {}
~MultiLevelPageTable() noexcept = default;
[[nodiscard]] Entry* find(size_t page) {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
return nullptr;
}
return &(*first_level_map[l1_page])[l2_page];
}
[[nodiscard]] const Entry& operator[](size_t page) const {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
first_level_map[l1_page] = page_alloc.Create();
}
return (*first_level_map[l1_page])[l2_page];
}
[[nodiscard]] Entry& operator[](size_t page) {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
first_level_map[l1_page] = page_alloc.Create();
}
return (*first_level_map[l1_page])[l2_page];
}
private:
std::vector<L1Page*> first_level_map{};
Common::ObjectPool<L1Page> page_alloc;
};
} // namespace VideoCore

View File

@ -22,7 +22,44 @@ namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12;
#ifdef SHADPS4_USERFAULTFD
#ifdef _WIN64
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
}
void OnMap(VAddr address, size_t size) {}
void OnUnmap(VAddr address, size_t size) {}
void Protect(VAddr address, size_t size, bool allow_write) {
DWORD prot = PROT_READ | (allow_write ? PROT_WRITE : 0);
DWORD old_prot{};
BOOL result = VirtualProtect(std::bit_cast<LPVOID>(address), len, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
}
static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
if (ec == EXCEPTION_ACCESS_VIOLATION) {
const auto info = pExp->ExceptionRecord->ExceptionInformation;
if (info[0] == 1) { // Write violation
rasterizer->InvalidateMemory(info[1], sizeof(u64));
return EXCEPTION_CONTINUE_EXECUTION;
} /* else {
UNREACHABLE();
}*/
}
return EXCEPTION_CONTINUE_SEARCH; // pass further
}
inline static Vulkan::Rasterizer* rasterizer;
void* veh_handle{};
};
#elif ENABLE_USERFAULTFD
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@ -121,9 +158,15 @@ struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
#ifdef __APPLE__
// Read-only memory write results in SIGBUS on Apple.
static constexpr int SignalType = SIGBUS;
#else
static constexpr int SignalType = SIGSEGV;
#endif
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SIGSEGV);
sigaddset(&signal_mask, SignalType);
using HandlerType = decltype(sigaction::sa_sigaction);
@ -131,7 +174,7 @@ struct PageManager::Impl {
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SIGSEGV, &guest_access_fault, nullptr);
sigaction(SignalType, &guest_access_fault, nullptr);
}
void OnMap(VAddr address, size_t size) {}
@ -148,7 +191,7 @@ struct PageManager::Impl {
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
if (err & 0x2) {
rasterizer->InvalidateMemory(address, PAGESIZE);
rasterizer->InvalidateMemory(address, sizeof(u64));
} else {
// Read not supported!
UNREACHABLE();

View File

@ -346,19 +346,26 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
}
for (const auto& buffer : stage.buffers) {
const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 alignment =
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding, adjust);
if (vsharp) {
const VAddr address = vsharp.base_address;
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
const u32 size = vsharp.GetSize();
const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
if (adjust != 0) {
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} else {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
@ -368,10 +375,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
}
boost::container::static_vector<AmdGpu::Image, 16> tsharps;

View File

@ -204,7 +204,8 @@ bool Instance::CreateDevice() {
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
@ -303,12 +304,19 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
}};
},
vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true,
},
};
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
}
if (!robustness) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
}
try {
device = physical_device.createDeviceUnique(device_chain.get());

View File

@ -5,7 +5,6 @@
#include <tsl/robin_map.h>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@ -51,8 +50,8 @@ private:
Shader::Profile profile{};
GraphicsPipelineKey graphics_key{};
u64 compute_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool;
Common::ObjectPool<Shader::IR::Inst> inst_pool;
Common::ObjectPool<Shader::IR::Block> block_pool;
};
} // namespace Vulkan

View File

@ -23,6 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
liverpool->BindRasterizer(this);
}
memory->SetRasterizer(this);
wfi_event = instance.GetDevice().createEventUnique({});
}
Rasterizer::~Rasterizer() = default;

View File

@ -60,6 +60,7 @@ private:
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache;
vk::UniqueEvent wfi_event;
};
} // namespace Vulkan

View File

@ -300,11 +300,11 @@ void TextureCache::UnregisterImage(ImageId image_id) {
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
if (page_it == nullptr) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
return;
}
auto& image_ids = page_it.value();
auto& image_ids = *page_it;
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);

View File

@ -9,6 +9,7 @@
#include "common/slot_vector.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/multi_level_page_table.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_view.h"
@ -25,12 +26,13 @@ class BufferCache;
class PageManager;
class TextureCache {
// This is the page shift for adding images into the hash map. It isn't related to
// the page size of the guest or the host and is chosen for convenience. A number too
// small will increase the number of hash map lookups per image, while too large will
// increase the number of images per page.
static constexpr u64 PageBits = 22;
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>;
static constexpr size_t AddressSpaceBits = 39;
static constexpr size_t FirstLevelBits = 9;
static constexpr size_t PageBits = 22;
};
using PageTable = MultiLevelPageTable<Traits>;
public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
@ -97,8 +99,8 @@ private:
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PageBits;
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
@ -116,14 +118,14 @@ private:
boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if (it == nullptr) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
for (const ImageId image_id : *it) {
Image& image = slot_images[image_id];
if (image.flags & ImageFlagBits::Picked) {
continue;
@ -175,7 +177,7 @@ private:
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageView> slot_image_views;
tsl::robin_map<u64, Sampler> samplers;
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
PageTable page_table;
boost::icl::interval_map<VAddr, s32> cached_pages;
std::mutex mutex;

View File

@ -202,12 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eR32G32B32A32Sfloat:
return vk::Format::eR32G32B32A32Uint;
default:
break;
}
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
// Log missing formats only once to avoid spamming the log.
static constexpr size_t MaxFormatIndex = 256;
static std::array<bool, MaxFormatIndex> logged_formats{};
if (const u32 index = u32(format); !logged_formats[index]) {
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
logged_formats[index] = true;
}
return format;
}