shadPS4/src/video_core/texture_cache/image.cpp

348 lines
12 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/config.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/tile_manager.h"
#include <vk_mem_alloc.h>
namespace VideoCore {
using namespace Vulkan;
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
using Libraries::VideoOut::TilingMode;
static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
return vk::Format::eR8G8B8A8Srgb;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
bool ImageInfo::IsBlockCoded() const {
switch (pixel_format) {
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc1RgbSrgbBlock:
case vk::Format::eBc1RgbUnormBlock:
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc4SnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eBc5SnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc6HSfloatBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
return true;
default:
return false;
}
}
bool ImageInfo::IsPacked() const {
switch (pixel_format) {
case vk::Format::eB5G5R5A1UnormPack16:
[[fallthrough]];
case vk::Format::eB5G6R5UnormPack16:
return true;
default:
return false;
}
}
bool ImageInfo::IsDepthStencil() const {
switch (pixel_format) {
case vk::Format::eD16Unorm:
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD32Sfloat:
case vk::Format::eD32SfloatS8Uint:
return true;
default:
return false;
}
}
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eSampled;
if (info.IsDepthStencil()) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
if (!info.IsBlockCoded() && !info.IsPacked()) {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
}
}
// In cases where an image is created as a render/depth target and cleared with compute,
// we cannot predict whether it will be used as a storage image. A proper solution would
// involve re-creating the resource with a new configuration and copying previous content into
// it. However, for now, we will set storage usage for all images (if the format allows),
// sacrificing a bit of performance. Note use of ExtendedUsage flag set by default.
usage |= vk::ImageUsageFlagBits::eStorage;
return usage;
}
static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
switch (type) {
case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color1DArray:
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
return vk::ImageType::e3D;
default:
UNREACHABLE();
}
}
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
const auto& attrib = group.attrib;
is_tiled = attrib.tiling_mode == TilingMode::Tile;
tiling_mode =
is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear;
pixel_format = ConvertPixelFormat(attrib.pixel_format);
type = vk::ImageType::e2D;
size.width = attrib.width;
size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
ASSERT(is_32bpp);
if (!is_tiled) {
guest_size_bytes = pitch * size.height * 4;
return;
}
if (Config::isNeoMode()) {
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
}
usage.vo_buffer = true;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
is_tiled = buffer.IsTiled();
tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
num_samples = 1 << buffer.attrib.num_fragments_log2;
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
usage.render_target = true;
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
}
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
is_tiled = image.IsTiled();
tiling_mode = image.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
type = ConvertImageType(image.GetType());
size.width = image.width + 1;
size.height = image.height + 1;
size.depth = 1;
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
guest_size_bytes = image.GetSizeAligned();
usage.texture = true;
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
: device{device_}, allocator{allocator_} {}
UniqueImage::~UniqueImage() {
if (image) {
vmaDestroyImage(allocator, image, allocation);
}
}
void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
const VkImageCreateInfo image_ci_unsafe = static_cast<VkImageCreateInfo>(image_ci);
VkImage unsafe_image{};
VkResult result = vmaCreateImage(allocator, &image_ci_unsafe, &alloc_info, &unsafe_image,
&allocation, nullptr);
ASSERT_MSG(result == VK_SUCCESS, "Failed allocating image with error {}",
vk::to_string(vk::Result{result}));
image = vk::Image{unsafe_image};
}
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_, VAddr cpu_addr)
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
ASSERT(info.pixel_format != vk::Format::eUndefined);
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
vk::ImageCreateFlagBits::eExtendedUsage};
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
info.size.width == info.size.height) {
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
}
if (info.type == vk::ImageType::e3D) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
if (info.IsBlockCoded()) {
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
usage = ImageUsageFlags(info);
if (info.pixel_format == vk::Format::eD32Sfloat) {
aspect_mask = vk::ImageAspectFlagBits::eDepth;
}
if (info.pixel_format == vk::Format::eD32SfloatS8Uint) {
aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
}
const vk::ImageCreateInfo image_ci = {
.flags = flags,
.imageType = info.type,
.format = info.pixel_format,
.extent{
.width = info.size.width,
.height = info.size.height,
.depth = info.size.depth,
},
.mipLevels = static_cast<u32>(info.resources.levels),
.arrayLayers = static_cast<u32>(info.resources.layers),
.samples = LiverpoolToVK::NumSamples(info.num_samples),
.tiling = vk::ImageTiling::eOptimal,
.usage = usage,
.initialLayout = vk::ImageLayout::eUndefined,
};
image.Create(image_ci);
// Create a special view for detiler
if (info.is_tiled) {
ImageViewInfo view_info;
view_info.format = DemoteImageFormatForDetiling(info.pixel_format);
view_for_detiler.emplace(*instance, view_info, *this, ImageId{});
}
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
vk::CommandBuffer cmdbuf) {
if (dst_layout == layout && dst_mask == access_mask) {
return;
}
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = access_mask,
.dstAccessMask = dst_mask,
.oldLayout = layout,
.newLayout = dst_layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
// Adjust pipieline stage
const vk::PipelineStageFlags dst_pl_stage =
(dst_mask == vk::AccessFlagBits::eTransferRead ||
dst_mask == vk::AccessFlagBits::eTransferWrite)
? vk::PipelineStageFlagBits::eTransfer
: vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader;
if (!cmdbuf) {
// When using external cmdbuf you are responsible for ending rp.
scheduler->EndRendering();
cmdbuf = scheduler->CommandBuffer();
}
cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {},
barrier);
layout = dst_layout;
access_mask = dst_mask;
pl_stage = dst_pl_stage;
}
void Image::Upload(vk::Buffer buffer, u64 offset) {
scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = info.pitch,
.bufferImageHeight = info.size.height,
.imageSubresource{
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {info.size.width, info.size.height, 1},
};
const auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
Image::~Image() = default;
} // namespace VideoCore