shadPS4/src/video_core/buffer_cache/buffer.cpp

228 lines
8.1 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include <vk_mem_alloc.h>
namespace VideoCore {
constexpr vk::BufferUsageFlags AllFlags =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer;
std::string_view BufferTypeName(MemoryUsage type) {
switch (type) {
case MemoryUsage::Upload:
return "Upload";
case MemoryUsage::Download:
return "Download";
case MemoryUsage::Stream:
return "Stream";
case MemoryUsage::DeviceLocal:
return "DeviceLocal";
default:
return "Invalid";
}
}
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
return {};
}
return {};
}
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
case MemoryUsage::Stream:
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
}
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
}
UniqueBuffer::UniqueBuffer(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
UniqueBuffer::~UniqueBuffer() {
if (buffer) {
vmaDestroyBuffer(allocator, buffer, allocation);
}
}
void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usage,
VmaAllocationInfo* out_alloc_info) {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
const VkBufferCreateInfo buffer_ci_unsafe = static_cast<VkBufferCreateInfo>(buffer_ci);
VkBuffer unsafe_buffer{};
VkResult result = vmaCreateBuffer(allocator, &buffer_ci_unsafe, &alloc_ci, &unsafe_buffer,
&allocation, out_alloc_info);
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating buffer with error {}",
vk::to_string(vk::Result{result}));
buffer = vk::Buffer{unsafe_buffer};
}
Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_,
u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_},
buffer{instance->GetDevice(), instance->GetAllocator()} {
// Create buffer object.
const vk::BufferCreateInfo buffer_ci = {
.size = size_bytes,
.usage = AllFlags,
};
VmaAllocationInfo alloc_info{};
buffer.Create(buffer_ci, usage, &alloc_info);
if (instance->HasDebuggingToolAttached()) {
const auto device = instance->GetDevice();
Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024);
}
// Map it if it is host visible.
VkMemoryPropertyFlags property_flags{};
vmaGetAllocationMemoryProperties(instance->GetAllocator(), buffer.allocation, &property_flags);
if (alloc_info.pMappedData) {
mapped_data = std::span<u8>{std::bit_cast<u8*>(alloc_info.pMappedData), size_bytes};
}
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
vk::BufferView Buffer::View(u32 offset, u32 size, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt) {
const auto it{std::ranges::find_if(views, [offset, size, dfmt, nfmt](const BufferView& view) {
return offset == view.offset && size == view.size && dfmt == view.dfmt && nfmt == view.nfmt;
})};
if (it != views.end()) {
return it->handle;
}
views.push_back({
.offset = offset,
.size = size,
.dfmt = dfmt,
.nfmt = nfmt,
.handle = instance->GetDevice().createBufferView({
.buffer = buffer.buffer,
.format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt),
.offset = offset,
.range = size,
}),
});
return views.back().handle;
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_,
MemoryUsage usage, u64 size_bytes)
: Buffer{instance, usage, 0, size_bytes}, scheduler{scheduler_} {
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
const auto device = instance.GetDevice();
if (instance.HasDebuggingToolAttached()) {
Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage),
size_bytes / 1024);
}
}
std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment) {
if (!is_coherent && usage == MemoryUsage::Stream) {
size = Common::AlignUp(size, instance->NonCoherentAtomSize());
}
ASSERT(size <= this->size_bytes);
mapped_size = size;
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
if (offset + size > this->size_bytes) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
}
const u64 mapped_upper_bound = offset + size;
WaitPendingOperations(mapped_upper_bound);
return std::make_pair(mapped_data.data() + offset, offset);
}
void StreamBuffer::Commit() {
if (!is_coherent) {
if (usage == MemoryUsage::Download) {
vmaInvalidateAllocation(instance->GetAllocator(), buffer.allocation, offset,
mapped_size);
} else {
vmaFlushAllocation(instance->GetAllocator(), buffer.allocation, offset, mapped_size);
}
}
offset += mapped_size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
} // namespace VideoCore