Surface management rework (2/3) (#329)
* texture_cache: interface refactoring * a bit of fixes and improvements * texture_cache: macro tile extents for bpp 128 * texture_cache: detiler: prefer host memory for large buffers upload
This commit is contained in:
parent
0d6edaa0a0
commit
30198d5ffc
|
@ -377,9 +377,13 @@ struct Liverpool {
|
|||
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
||||
}
|
||||
|
||||
u32 NumBits() const {
|
||||
return z_info.format == ZFormat::Z32Float ? 32 : 16;
|
||||
}
|
||||
|
||||
size_t GetDepthSliceSize() const {
|
||||
ASSERT(z_info.format != ZFormat::Invalid);
|
||||
const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2;
|
||||
const auto bpe = NumBits() >> 3; // in bytes
|
||||
return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(r32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint sizes[14];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -31,20 +35,22 @@ uint rmort[16] = {
|
|||
#define TEXELS_PER_ELEMENT (1)
|
||||
|
||||
void main() {
|
||||
uint tile_base = gl_GlobalInvocationID.x - gl_LocalInvocationID.x; // WG*16
|
||||
uint p0 = in_data[gl_GlobalInvocationID.x];
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint p0 = in_data[gl_GlobalInvocationID.x];
|
||||
uint mip = 0;
|
||||
for (int m = 0; m < info.num_levels; ++m) {
|
||||
mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0;
|
||||
}
|
||||
|
||||
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||
uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1);
|
||||
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||
|
||||
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col;
|
||||
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||
|
||||
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||
imageStore(output_img, img_pos, uvec4(p0, 0, 0, 0));
|
||||
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col;
|
||||
uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM;
|
||||
out_data[dw_ofs_x + dw_ofs_y] = p0;
|
||||
}
|
|
@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint sizes[14];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -30,19 +34,25 @@ uint rmort[16] = {
|
|||
#define MICRO_TILE_DIM (8)
|
||||
|
||||
void main() {
|
||||
uint block_ofs = 2 * gl_GlobalInvocationID.x;
|
||||
uint p0 = in_data[block_ofs + 0];
|
||||
uint p1 = in_data[block_ofs + 1];
|
||||
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint block_ofs = 2 * gl_GlobalInvocationID.x;
|
||||
uint p0 = in_data[block_ofs + 0];
|
||||
uint p1 = in_data[block_ofs + 1];
|
||||
uint mip = 0;
|
||||
for (int m = 0; m < info.num_levels; ++m) {
|
||||
mip += (gl_GlobalInvocationID.x * 8) >= info.sizes[m] ? 1 : 0;
|
||||
}
|
||||
|
||||
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
|
||||
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
|
||||
gl_WorkGroupID.x % tiles_per_pitch,
|
||||
gl_WorkGroupID.x / tiles_per_pitch
|
||||
);
|
||||
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0));
|
||||
uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1) * 2;
|
||||
uint target_tile_x = 2 * gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = 2 * gl_WorkGroupID.x / tiles_per_pitch;
|
||||
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col * 2;
|
||||
uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM;
|
||||
out_data[dw_ofs_x + dw_ofs_y] = p0;
|
||||
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
|
||||
}
|
|
@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(rgba32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint sizes[14];
|
||||
} info;
|
||||
|
||||
// Inverse morton LUT, small enough to fit into K$
|
||||
|
@ -30,21 +34,29 @@ uint rmort[16] = {
|
|||
#define MICRO_TILE_DIM (8)
|
||||
|
||||
void main() {
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint block_ofs = 4 * gl_GlobalInvocationID.x;
|
||||
uint p0 = in_data[block_ofs + 0];
|
||||
uint p1 = in_data[block_ofs + 1];
|
||||
uint p2 = in_data[block_ofs + 2];
|
||||
uint p3 = in_data[block_ofs + 3];
|
||||
|
||||
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
|
||||
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
|
||||
gl_WorkGroupID.x % tiles_per_pitch,
|
||||
gl_WorkGroupID.x / tiles_per_pitch
|
||||
);
|
||||
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, p2, p3));
|
||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint mip = 0;
|
||||
for (int m = 0; m < info.num_levels; ++m) {
|
||||
mip += (gl_GlobalInvocationID.x * 16) >= info.sizes[m] ? 1 : 0;
|
||||
}
|
||||
|
||||
uint tiles_per_pitch = max(((info.pitch >> mip) / MICRO_TILE_DIM), 1u) * 4;
|
||||
uint target_tile_x = 4 * gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = 4 * gl_WorkGroupID.x / tiles_per_pitch;
|
||||
uint dw_ofs_x = (target_tile_x * MICRO_TILE_DIM) + 4 * col;
|
||||
uint dw_ofs_y = ((target_tile_y * tiles_per_pitch) * 64u) + ((row * tiles_per_pitch) * MICRO_TILE_DIM);
|
||||
out_data[dw_ofs_x + dw_ofs_y] = p0;
|
||||
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
|
||||
out_data[dw_ofs_x + dw_ofs_y + 2] = p2;
|
||||
out_data[dw_ofs_x + dw_ofs_y + 3] = p3;
|
||||
}
|
|
@ -11,10 +11,14 @@ layout (local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(r8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint sizes[14];
|
||||
} info;
|
||||
|
||||
#define MICRO_TILE_DIM 8
|
||||
|
@ -32,17 +36,15 @@ void main() {
|
|||
uint row = (gl_LocalInvocationID.x % TEXELS_PER_ELEMENT)
|
||||
+ TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x >> 3);
|
||||
|
||||
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||
uint mip = 0;
|
||||
for (int m = 0; m < info.num_levels; ++m) {
|
||||
mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0;
|
||||
}
|
||||
|
||||
uint tiles_per_pitch = max((info.pitch >> mip) / 8, 1);
|
||||
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col;
|
||||
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||
|
||||
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||
|
||||
#pragma unroll
|
||||
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(dst_tx & 0xff));
|
||||
dst_tx >>= 8;
|
||||
}
|
||||
uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints
|
||||
uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints
|
||||
out_data[dw_ofs_x + dw_ofs_y] = dst_tx;
|
||||
}
|
|
@ -10,10 +10,14 @@ layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
|||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint sizes[14];
|
||||
} info;
|
||||
|
||||
#define MICRO_TILE_DIM 8
|
||||
|
@ -44,18 +48,14 @@ void main() {
|
|||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||
|
||||
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||
uint mip = 0u;
|
||||
for (int m = 0; m < info.num_levels; ++m) {
|
||||
mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0;
|
||||
}
|
||||
uint tiles_per_pitch = max(((info.pitch >> mip) / 8u), 1u);
|
||||
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col;
|
||||
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||
|
||||
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||
|
||||
#pragma unroll
|
||||
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||
uint p0 = (p[ofs] >> 8) & 0xff;
|
||||
uint p1 = p[ofs] & 0xff;
|
||||
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p1, p0, 0, 0));
|
||||
}
|
||||
uint dw_ofs_x = target_tile_x * 8 + col;
|
||||
uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * 8;
|
||||
out_data[(dw_ofs_x + dw_ofs_y) / 2] = src_tx;
|
||||
}
|
||||
|
|
|
@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eR16G16Sfloat;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR16G16Unorm;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format10_11_11 &&
|
||||
num_format == AmdGpu::NumberFormat::Float) {
|
||||
return vk::Format::eB10G11R11UfloatPack32;
|
||||
|
|
|
@ -128,7 +128,9 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||
for (const auto& image_desc : info.images) {
|
||||
const auto tsharp =
|
||||
info.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
|
||||
const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -366,7 +366,9 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||
for (const auto& image_desc : stage.images) {
|
||||
const auto& tsharp = tsharps.emplace_back(
|
||||
stage.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset));
|
||||
const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -191,7 +191,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
|
||||
const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf);
|
||||
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
|
||||
base_format, col_buf.info.comp_swap.Value(), is_vo_surface);
|
||||
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
|
||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
|
|
|
@ -32,6 +32,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
|
|||
switch (static_cast<u32>(callback_data->messageIdNumber)) {
|
||||
case 0x609a13b: // Vertex attribute at location not consumed by shader
|
||||
case 0xc81ad50e:
|
||||
case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error)
|
||||
return VK_FALSE;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -120,7 +120,9 @@ void Rasterizer::BeginRendering() {
|
|||
}
|
||||
|
||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||
const auto& image_view = texture_cache.FindRenderTarget(col_buf, hint);
|
||||
VideoCore::ImageInfo image_info{col_buf, hint};
|
||||
VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/};
|
||||
const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
@ -143,9 +145,10 @@ void Rasterizer::BeginRendering() {
|
|||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address);
|
||||
const auto& hint = liverpool->last_db_extent;
|
||||
const auto& image_view = texture_cache.FindDepthTarget(
|
||||
regs.depth_buffer, regs.depth_view.NumSlices(), htile_address, hint,
|
||||
regs.depth_control.depth_write_enable);
|
||||
VideoCore::ImageInfo image_info{regs.depth_buffer, regs.depth_view.NumSlices(),
|
||||
htile_address, hint};
|
||||
VideoCore::ImageViewInfo view_info{regs.depth_buffer, regs.depth_view, regs.depth_control};
|
||||
const auto& image_view = texture_cache.FindDepthTarget(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
|
|
@ -117,18 +117,15 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
|
||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
||||
// the texture cache should re-create the resource with the usage requested
|
||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||
vk::ImageCreateFlagBits::eExtendedUsage};
|
||||
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
||||
info.size.width == info.size.height) {
|
||||
if (info.props.is_cube) {
|
||||
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
||||
}
|
||||
if (info.type == vk::ImageType::e3D) {
|
||||
} else if (info.props.is_volume) {
|
||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||
}
|
||||
if (info.IsBlockCoded()) {
|
||||
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||
}
|
||||
|
||||
usage = ImageUsageFlags(info);
|
||||
|
||||
|
@ -157,15 +154,6 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
};
|
||||
|
||||
image.Create(image_ci);
|
||||
|
||||
// Create a special view for detiler
|
||||
if (info.is_tiled) {
|
||||
ImageViewInfo view_info;
|
||||
view_info.format = DemoteImageFormatForDetiling(info.pixel_format);
|
||||
view_for_detiler.emplace(*instance, view_info, *this, ImageId{});
|
||||
}
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||
|
|
|
@ -105,7 +105,6 @@ struct Image {
|
|||
VAddr cpu_addr_end = 0;
|
||||
std::vector<ImageViewInfo> image_view_infos;
|
||||
std::vector<ImageViewId> image_view_ids;
|
||||
std::optional<ImageView> view_for_detiler;
|
||||
|
||||
// Resource state tracking
|
||||
vk::ImageUsageFlags usage;
|
||||
|
|
|
@ -47,33 +47,33 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
|||
// clang-format off
|
||||
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
|
||||
static constexpr std::array macro_tile_extents{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0B
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 10
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 11
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{128u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
|
@ -82,62 +82,65 @@ static constexpr auto hw_pipe_interleave = 256u;
|
|||
|
||||
static constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) {
|
||||
ASSERT(num_samples == 1);
|
||||
const auto row = tiling_idx * 4;
|
||||
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64
|
||||
const auto row = tiling_idx * 5;
|
||||
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128
|
||||
return macro_tile_extents[row + column];
|
||||
}
|
||||
|
||||
static constexpr size_t ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, u32 num_samples) {
|
||||
static constexpr std::pair<u32, size_t> ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = height;
|
||||
size_t log_sz = 1;
|
||||
const auto slice_align = std::max(64u, hw_pipe_interleave / (bpp + 7) / 8);
|
||||
size_t log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8));
|
||||
while (log_sz % slice_align) {
|
||||
log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
pitch_aligned += pitch_align;
|
||||
log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
}
|
||||
return (log_sz * bpp + 7) / 8;
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
static constexpr size_t ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples) {
|
||||
static constexpr std::pair<u32, size_t> ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto& [pitch_align, height_align] = micro_tile_extent;
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
size_t log_sz = 1;
|
||||
size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
while (log_sz % 256) {
|
||||
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
pitch_aligned += 8;
|
||||
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
}
|
||||
return log_sz;
|
||||
return {pitch_aligned, log_sz};
|
||||
}
|
||||
|
||||
static constexpr size_t ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples,
|
||||
u32 tiling_idx) {
|
||||
static constexpr std::pair<u32, size_t> ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples, u32 tiling_idx) {
|
||||
const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples);
|
||||
ASSERT(pitch_align != 0 && height_align != 0);
|
||||
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
return (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
const auto log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
|
||||
VAddr cpu_address) noexcept {
|
||||
const auto& attrib = group.attrib;
|
||||
is_tiled = attrib.tiling_mode == TilingMode::Tile;
|
||||
tiling_mode =
|
||||
is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear;
|
||||
props.is_tiled = attrib.tiling_mode == TilingMode::Tile;
|
||||
tiling_mode = props.is_tiled ? AmdGpu::TilingMode::Display_MacroTiled
|
||||
: AmdGpu::TilingMode::Display_Linear;
|
||||
pixel_format = ConvertPixelFormat(attrib.pixel_format);
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = attrib.width;
|
||||
size.height = attrib.height;
|
||||
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
|
||||
usage.vo_buffer = true;
|
||||
const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float;
|
||||
ASSERT(is_32bpp);
|
||||
num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64;
|
||||
ASSERT(num_bits == 32);
|
||||
|
||||
guest_address = cpu_address;
|
||||
if (!is_tiled) {
|
||||
if (!props.is_tiled) {
|
||||
guest_size_bytes = pitch * size.height * 4;
|
||||
} else {
|
||||
if (Config::isNeoMode()) {
|
||||
|
@ -146,15 +149,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
|
|||
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
|
||||
}
|
||||
}
|
||||
mips_layout.emplace_back(0, guest_size_bytes);
|
||||
mips_layout.emplace_back(guest_size_bytes, pitch, 0);
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
|
||||
is_tiled = buffer.IsTiled();
|
||||
props.is_tiled = buffer.IsTiled();
|
||||
tiling_mode = buffer.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
||||
num_samples = 1 << buffer.attrib.num_fragments_log2;
|
||||
num_bits = NumBits(buffer.info.format);
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
|
@ -168,15 +172,16 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||
guest_address = buffer.Address();
|
||||
const auto color_slice_sz = buffer.GetColorSliceSize();
|
||||
guest_size_bytes = color_slice_sz * buffer.NumSlices();
|
||||
mips_layout.emplace_back(0, color_slice_sz);
|
||||
mips_layout.emplace_back(color_slice_sz, pitch, 0);
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
|
||||
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
|
||||
is_tiled = false;
|
||||
props.is_tiled = false;
|
||||
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
||||
type = vk::ImageType::e2D;
|
||||
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
|
||||
num_bits = buffer.NumBits();
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
size.depth = 1;
|
||||
|
@ -188,37 +193,38 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
|||
guest_address = buffer.Address();
|
||||
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
||||
guest_size_bytes = depth_slice_sz * num_slices;
|
||||
mips_layout.emplace_back(0, depth_slice_sz);
|
||||
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||
is_tiled = image.IsTiled();
|
||||
tiling_mode = image.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
type = ConvertImageType(image.GetType());
|
||||
is_cube = image.GetType() == AmdGpu::ImageType::Cube;
|
||||
is_volume = image.GetType() == AmdGpu::ImageType::Color3D;
|
||||
props.is_tiled = image.IsTiled();
|
||||
props.is_cube = image.GetType() == AmdGpu::ImageType::Cube;
|
||||
props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D;
|
||||
props.is_pow2 = image.pow2pad;
|
||||
props.is_block = IsBlockCoded();
|
||||
size.width = image.width + 1;
|
||||
size.height = image.height + 1;
|
||||
size.depth = is_volume ? image.depth + 1 : 1;
|
||||
size.depth = props.is_volume ? image.depth + 1 : 1;
|
||||
pitch = image.Pitch();
|
||||
resources.levels = image.NumLevels();
|
||||
resources.layers = image.NumLayers();
|
||||
num_bits = NumBits(image.GetDataFmt());
|
||||
usage.texture = true;
|
||||
|
||||
guest_address = image.Address();
|
||||
|
||||
mips_layout.reserve(resources.levels);
|
||||
const auto num_bits = NumBits(image.GetDataFmt());
|
||||
const auto is_block = IsBlockCoded();
|
||||
const auto is_pow2 = image.pow2pad;
|
||||
|
||||
MipInfo mip_info{};
|
||||
guest_size_bytes = 0;
|
||||
for (auto mip = 0u; mip < resources.levels; ++mip) {
|
||||
auto bpp = num_bits;
|
||||
auto mip_w = pitch >> mip;
|
||||
auto mip_h = size.height >> mip;
|
||||
if (is_block) {
|
||||
if (props.is_block) {
|
||||
mip_w = (mip_w + 3) / 4;
|
||||
mip_h = (mip_h + 3) / 4;
|
||||
bpp *= 16;
|
||||
|
@ -227,40 +233,48 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||
mip_h = std::max(mip_h, 1u);
|
||||
auto mip_d = std::max(size.depth >> mip, 1u);
|
||||
|
||||
if (is_pow2) {
|
||||
if (props.is_pow2) {
|
||||
mip_w = std::bit_ceil(mip_w);
|
||||
mip_h = std::bit_ceil(mip_h);
|
||||
mip_d = std::bit_ceil(mip_d);
|
||||
}
|
||||
|
||||
size_t mip_size = 0;
|
||||
switch (tiling_mode) {
|
||||
case AmdGpu::TilingMode::Display_Linear: {
|
||||
ASSERT(!is_cube);
|
||||
mip_size = ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples);
|
||||
ASSERT(!props.is_cube);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples);
|
||||
mip_info.height = mip_h;
|
||||
break;
|
||||
}
|
||||
case AmdGpu::TilingMode::Texture_MicroTiled: {
|
||||
mip_size = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples);
|
||||
mip_info.height = std::max(mip_h, 8u);
|
||||
if (props.is_block) {
|
||||
mip_info.pitch = std::max(mip_info.pitch * 4, 32u);
|
||||
mip_info.height = std::max(mip_info.height * 4, 32u);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AmdGpu::TilingMode::Display_MacroTiled:
|
||||
case AmdGpu::TilingMode::Texture_MacroTiled:
|
||||
case AmdGpu::TilingMode::Depth_MacroTiled: {
|
||||
ASSERT(!is_cube && !is_block);
|
||||
ASSERT(!props.is_cube && !props.is_block);
|
||||
ASSERT(num_samples == 1);
|
||||
ASSERT(num_bits <= 64);
|
||||
mip_size = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
mip_size *= mip_d;
|
||||
mip_info.size *= mip_d;
|
||||
|
||||
mips_layout.emplace_back(guest_size_bytes, mip_size);
|
||||
guest_size_bytes += mip_size;
|
||||
mip_info.offset = guest_size_bytes;
|
||||
mips_layout.emplace_back(mip_info);
|
||||
guest_size_bytes += mip_info.size;
|
||||
}
|
||||
guest_size_bytes *= resources.layers;
|
||||
}
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
struct ImageInfo {
|
||||
|
@ -42,18 +44,29 @@ struct ImageInfo {
|
|||
u32 vo_buffer : 1;
|
||||
} usage{}; // Usage data tracked during image lifetime
|
||||
|
||||
bool is_cube = false;
|
||||
bool is_volume = false;
|
||||
bool is_tiled = false;
|
||||
bool is_read_only = false;
|
||||
struct {
|
||||
u32 is_cube : 1;
|
||||
u32 is_volume : 1;
|
||||
u32 is_tiled : 1;
|
||||
u32 is_pow2 : 1;
|
||||
u32 is_block : 1;
|
||||
} props{}; // Surface properties with impact on various calculation factors
|
||||
|
||||
vk::Format pixel_format = vk::Format::eUndefined;
|
||||
vk::ImageType type = vk::ImageType::e1D;
|
||||
SubresourceExtent resources;
|
||||
Extent3D size{1, 1, 1};
|
||||
u32 num_bits{};
|
||||
u32 num_samples = 1;
|
||||
u32 pitch = 0;
|
||||
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
|
||||
std::vector<std::pair<u32, u32>> mips_layout;
|
||||
struct MipInfo {
|
||||
u32 size;
|
||||
u32 pitch;
|
||||
u32 height;
|
||||
u32 offset;
|
||||
};
|
||||
boost::container::small_vector<MipInfo, 14> mips_layout;
|
||||
VAddr guest_address{0};
|
||||
u32 guest_size_bytes{0};
|
||||
};
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
|
@ -50,15 +51,18 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce
|
|||
: is_storage{is_storage} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
range.base.level = static_cast<u32>(image.base_level);
|
||||
range.base.layer = static_cast<u32>(image.base_array);
|
||||
range.extent.levels = image.NumLevels();
|
||||
range.extent.layers = image.NumLayers();
|
||||
if (!is_storage) {
|
||||
range.base.level = image.base_level;
|
||||
range.base.layer = image.base_array;
|
||||
range.extent.levels = image.last_level + 1;
|
||||
range.extent.layers = image.last_array + 1;
|
||||
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
// Check for unfortunate case of storage images being swizzled
|
||||
if (is_storage && (mapping != vk::ComponentMapping{})) {
|
||||
LOG_ERROR(Render_Vulkan, "Storage image requires swizzling");
|
||||
mapping = vk::ComponentMapping{};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,6 +74,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
|||
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
||||
AmdGpu::Liverpool::DepthView view,
|
||||
AmdGpu::Liverpool::DepthControl ctl) {
|
||||
format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format,
|
||||
depth_buffer.stencil_info.format);
|
||||
is_storage = ctl.depth_write_enable;
|
||||
range.base.layer = view.slice_start;
|
||||
range.extent.layers = view.NumSlices();
|
||||
}
|
||||
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
|
||||
ImageId image_id_, std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
||||
: info{info_}, image_id{image_id_} {
|
||||
|
@ -93,10 +107,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
.components = instance.GetSupportedComponentSwizzle(format, info.mapping),
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = 0U,
|
||||
.levelCount = 1,
|
||||
.baseMipLevel = info.range.base.level,
|
||||
.levelCount = info.range.extent.levels - info.range.base.level,
|
||||
.baseArrayLayer = info_.range.base.layer,
|
||||
.layerCount = image.info.IsBlockCoded() ? 1 : VK_REMAINING_ARRAY_LAYERS,
|
||||
.layerCount = info.range.extent.layers - info.range.base.layer,
|
||||
},
|
||||
};
|
||||
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
|
||||
|
|
|
@ -18,10 +18,11 @@ class Scheduler;
|
|||
namespace VideoCore {
|
||||
|
||||
struct ImageViewInfo {
|
||||
explicit ImageViewInfo() = default;
|
||||
explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
|
||||
explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
||||
bool is_vo_surface) noexcept;
|
||||
ImageViewInfo() = default;
|
||||
ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
|
||||
ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept;
|
||||
ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
|
||||
AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl);
|
||||
|
||||
vk::ImageViewType type = vk::ImageViewType::e2D;
|
||||
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
||||
|
|
|
@ -152,8 +152,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
|||
image_id = image_ids[0];
|
||||
}
|
||||
|
||||
RegisterMeta(info, image_id);
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
||||
RefreshImage(image);
|
||||
|
@ -184,13 +182,12 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) {
|
||||
const ImageInfo info{desc};
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(info);
|
||||
Image& image = slot_images[image_id];
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
if (is_storage) {
|
||||
if (view_info.is_storage) {
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
usage.storage = true;
|
||||
} else {
|
||||
|
@ -201,14 +198,36 @@ ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage)
|
|||
usage.texture = true;
|
||||
}
|
||||
|
||||
const ImageViewInfo view_info{desc, is_storage};
|
||||
return RegisterImageView(image_id, view_info);
|
||||
// These changes are temporary and should be removed once texture cache will handle subresources
|
||||
// merging
|
||||
auto view_info_tmp = view_info;
|
||||
if (view_info_tmp.range.base.level > image.info.resources.levels - 1 ||
|
||||
view_info_tmp.range.base.layer > image.info.resources.layers - 1 ||
|
||||
view_info_tmp.range.extent.levels > image.info.resources.levels ||
|
||||
view_info_tmp.range.extent.layers > image.info.resources.layers) {
|
||||
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})",
|
||||
view_info_tmp.range.base.level, view_info_tmp.range.extent.levels,
|
||||
view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers,
|
||||
image.info.resources.levels, image.info.resources.layers);
|
||||
|
||||
view_info_tmp.range.base.level =
|
||||
std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1);
|
||||
view_info_tmp.range.base.layer =
|
||||
std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1);
|
||||
view_info_tmp.range.extent.levels =
|
||||
std::min(view_info_tmp.range.extent.levels, image.info.resources.levels);
|
||||
view_info_tmp.range.extent.layers =
|
||||
std::min(view_info_tmp.range.extent.layers, image.info.resources.layers);
|
||||
}
|
||||
|
||||
return RegisterImageView(image_id, view_info_tmp);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||
const ImageInfo info{buffer, hint};
|
||||
const ImageId image_id = FindImage(info);
|
||||
ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(image_info);
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
|
@ -216,30 +235,56 @@ ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer&
|
|||
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
vk::AccessFlagBits::eColorAttachmentRead);
|
||||
|
||||
// Register meta data for this color buffer
|
||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
||||
if (image_info.meta_info.cmask_addr) {
|
||||
surface_metas.emplace(
|
||||
image_info.meta_info.cmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true});
|
||||
image.info.meta_info.cmask_addr = image_info.meta_info.cmask_addr;
|
||||
image.flags |= ImageFlagBits::MetaRegistered;
|
||||
}
|
||||
|
||||
if (image_info.meta_info.fmask_addr) {
|
||||
surface_metas.emplace(
|
||||
image_info.meta_info.fmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true});
|
||||
image.info.meta_info.fmask_addr = image_info.meta_info.fmask_addr;
|
||||
image.flags |= ImageFlagBits::MetaRegistered;
|
||||
}
|
||||
}
|
||||
|
||||
// Update tracked image usage
|
||||
image.info.usage.render_target = true;
|
||||
|
||||
ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer};
|
||||
return RegisterImageView(image_id, view_info);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
u32 num_slices, VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint,
|
||||
bool write_enabled) {
|
||||
const ImageInfo info{buffer, num_slices, htile_address, hint};
|
||||
const ImageId image_id = FindImage(info, false);
|
||||
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(image_info, false);
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
const auto new_layout = write_enabled ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
: vk::ImageLayout::eDepthStencilReadOnlyOptimal;
|
||||
image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead);
|
||||
|
||||
// Register meta data for this depth buffer
|
||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
||||
if (image_info.meta_info.htile_addr) {
|
||||
surface_metas.emplace(
|
||||
image_info.meta_info.htile_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true});
|
||||
image.info.meta_info.htile_addr = image_info.meta_info.htile_addr;
|
||||
image.flags |= ImageFlagBits::MetaRegistered;
|
||||
}
|
||||
}
|
||||
|
||||
// Update tracked image usage
|
||||
image.info.usage.depth_target = true;
|
||||
|
||||
ImageViewInfo view_info;
|
||||
view_info.format = info.pixel_format;
|
||||
return RegisterImageView(image_id, view_info);
|
||||
}
|
||||
|
||||
|
@ -247,64 +292,56 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
// Mark image as validated.
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
{
|
||||
if (!tile_manager.TryDetile(image)) {
|
||||
// Upload data to the staging buffer.
|
||||
const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4);
|
||||
// Copy to the image.
|
||||
image.Upload(staging.Handle(), offset);
|
||||
}
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(image.info.resources.levels == image.info.mips_layout.size());
|
||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||
for (u32 m = 0; m < image.info.resources.levels; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth = image.info.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const u32 map_size = image.info.mips_layout[m].second * image.info.resources.layers;
|
||||
|
||||
// Upload data to the staging buffer.
|
||||
const auto [data, offset, _] = staging.Map(map_size, 16);
|
||||
if (image.info.is_tiled) {
|
||||
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
|
||||
} else {
|
||||
std::memcpy(data,
|
||||
image_data + image.info.mips_layout[m].first * image.info.resources.layers,
|
||||
map_size);
|
||||
}
|
||||
staging.Commit(map_size);
|
||||
|
||||
// Copy to the image.
|
||||
const vk::BufferImageCopy image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = u32(image.info.resources.layers),
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
};
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
vk::Buffer buffer{staging.Handle()};
|
||||
u32 offset{0};
|
||||
|
||||
auto upload_buffer = tile_manager.TryDetile(image);
|
||||
if (upload_buffer) {
|
||||
buffer = *upload_buffer;
|
||||
} else {
|
||||
// Upload data to the staging buffer.
|
||||
const auto [data, offset_, _] = staging.Map(image.info.guest_size_bytes, 16);
|
||||
std::memcpy(data, (void*)image.info.guest_address, image.info.guest_size_bytes);
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
offset = offset_;
|
||||
}
|
||||
|
||||
const auto& num_layers = image.info.resources.layers;
|
||||
const auto& num_mips = image.info.resources.levels;
|
||||
ASSERT(num_mips == image.info.mips_layout.size());
|
||||
|
||||
boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
|
||||
for (u32 m = 0; m < num_mips; m++) {
|
||||
const u32 width = std::max(image.info.size.width >> m, 1u);
|
||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
|
||||
image_copy.push_back({
|
||||
.bufferOffset = offset + mip_ofs * num_layers,
|
||||
.bufferRowLength = static_cast<uint32_t>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<uint32_t>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = m,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = num_layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, depth},
|
||||
});
|
||||
}
|
||||
|
||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||
}
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
@ -320,47 +357,8 @@ void TextureCache::RegisterImage(ImageId image_id) {
|
|||
image.flags |= ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.info.guest_size_bytes,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
}
|
||||
|
||||
void TextureCache::RegisterMeta(const ImageInfo& info, ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
|
||||
if (image.flags & ImageFlagBits::MetaRegistered) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool registered = true;
|
||||
// Current resource tracking implementation allows us to detect usage of meta only in the last
|
||||
// moment, so we likely will miss its first clear. To avoid this and make first frame, where
|
||||
// the meta is encountered, looks correct we set its state to "cleared" at registrations time.
|
||||
if (info.usage.render_target) {
|
||||
if (info.meta_info.cmask_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.cmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true});
|
||||
image.info.meta_info.cmask_addr = info.meta_info.cmask_addr;
|
||||
}
|
||||
|
||||
if (info.meta_info.fmask_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.fmask_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true});
|
||||
image.info.meta_info.fmask_addr = info.meta_info.fmask_addr;
|
||||
}
|
||||
} else if (info.usage.depth_target) {
|
||||
if (info.meta_info.htile_addr) {
|
||||
surface_metas.emplace(
|
||||
info.meta_info.htile_addr,
|
||||
MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true});
|
||||
image.info.meta_info.htile_addr = info.meta_info.htile_addr;
|
||||
}
|
||||
} else {
|
||||
registered = false;
|
||||
}
|
||||
|
||||
if (registered) {
|
||||
image.flags |= ImageFlagBits::MetaRegistered;
|
||||
}
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
||||
}
|
||||
|
||||
void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
|
|
|
@ -51,17 +51,16 @@ public:
|
|||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||
[[nodiscard]] ImageView& FindTexture(const AmdGpu::Image& image, bool is_storage);
|
||||
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info);
|
||||
|
||||
/// Retrieves the render target with specified properties
|
||||
[[nodiscard]] ImageView& FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint);
|
||||
[[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info);
|
||||
|
||||
/// Retrieves the depth target with specified properties
|
||||
[[nodiscard]] ImageView& FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||
u32 num_slices, VAddr htile_address,
|
||||
const AmdGpu::Liverpool::CbDbExtent& hint,
|
||||
bool write_enabled);
|
||||
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info);
|
||||
|
||||
/// Reuploads image contents.
|
||||
void RefreshImage(Image& image);
|
||||
|
@ -158,9 +157,6 @@ private:
|
|||
/// Register image in the page table
|
||||
void RegisterImage(ImageId image);
|
||||
|
||||
/// Register metadata surfaces attached to the image
|
||||
void RegisterMeta(const ImageInfo& info, ImageId image);
|
||||
|
||||
/// Unregister image from the page table
|
||||
void UnregisterImage(ImageId image);
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <magic_enum.hpp>
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
|
@ -176,6 +177,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
return vk::Format::eR8Uint;
|
||||
case vk::Format::eR8G8Unorm:
|
||||
case vk::Format::eR16Sfloat:
|
||||
case vk::Format::eR16Unorm:
|
||||
return vk::Format::eR8G8Uint;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
case vk::Format::eB8G8R8A8Srgb:
|
||||
|
@ -183,10 +185,13 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
case vk::Format::eR16G16Sfloat:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
case vk::Format::eR32G32Sfloat:
|
||||
case vk::Format::eR32G32Uint:
|
||||
case vk::Format::eR16G16B16A16Unorm:
|
||||
return vk::Format::eR32G32Uint;
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
|
@ -225,14 +230,14 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer |
|
||||
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
struct DetilerParams {
|
||||
u32 num_levels;
|
||||
u32 pitch0;
|
||||
u32 sizes[14];
|
||||
};
|
||||
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler},
|
||||
staging{instance, scheduler, StagingFlags, 256_MB, Vulkan::BufferType::Upload} {
|
||||
|
||||
: instance{instance}, scheduler{scheduler} {
|
||||
static const std::array detiler_shaders{
|
||||
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
|
||||
|
@ -264,7 +269,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
|||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
},
|
||||
|
@ -281,7 +286,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
|||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
.size = sizeof(u32),
|
||||
.size = sizeof(DetilerParams),
|
||||
};
|
||||
|
||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||
|
@ -312,35 +317,88 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
|||
|
||||
TileManager::~TileManager() = default;
|
||||
|
||||
bool TileManager::TryDetile(Image& image) {
|
||||
if (!image.info.is_tiled) {
|
||||
return false;
|
||||
TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*= false*/) {
|
||||
const auto usage = vk::BufferUsageFlagBits::eStorageBuffer |
|
||||
(is_storage ? vk::BufferUsageFlagBits::eTransferSrc
|
||||
: vk::BufferUsageFlagBits::eTransferDst);
|
||||
const vk::BufferCreateInfo buffer_ci{
|
||||
.size = size,
|
||||
.usage = usage,
|
||||
};
|
||||
|
||||
const bool is_large_buffer = size > 128_MB;
|
||||
VmaAllocationCreateInfo alloc_info{
|
||||
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
|
||||
: static_cast<VmaAllocationCreateFlags>(0),
|
||||
.usage = is_large_buffer ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST
|
||||
: VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
.requiredFlags = !is_storage ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
: static_cast<VkMemoryPropertyFlags>(0),
|
||||
};
|
||||
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
const auto buffer_ci_unsafe = static_cast<VkBufferCreateInfo>(buffer_ci);
|
||||
const auto result = vmaCreateBuffer(instance.GetAllocator(), &buffer_ci_unsafe, &alloc_info,
|
||||
&buffer, &allocation, nullptr);
|
||||
ASSERT(result == VK_SUCCESS);
|
||||
return {buffer, allocation};
|
||||
}
|
||||
|
||||
void TileManager::Upload(ScratchBuffer buffer, const void* data, size_t size) {
|
||||
VmaAllocationInfo alloc_info{};
|
||||
vmaGetAllocationInfo(instance.GetAllocator(), buffer.second, &alloc_info);
|
||||
ASSERT(size <= alloc_info.size);
|
||||
void* ptr{};
|
||||
const auto result = vmaMapMemory(instance.GetAllocator(), buffer.second, &ptr);
|
||||
ASSERT(result == VK_SUCCESS);
|
||||
std::memcpy(ptr, data, size);
|
||||
vmaUnmapMemory(instance.GetAllocator(), buffer.second);
|
||||
}
|
||||
|
||||
void TileManager::FreeBuffer(ScratchBuffer buffer) {
|
||||
vmaDestroyBuffer(instance.GetAllocator(), buffer.first, buffer.second);
|
||||
}
|
||||
|
||||
std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
||||
if (!image.info.props.is_tiled) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto* detiler = GetDetiler(image);
|
||||
if (!detiler) {
|
||||
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
|
||||
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
|
||||
return false;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto offset =
|
||||
staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment());
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||
// Prepare input buffer
|
||||
auto in_buffer = AllocBuffer(image.info.guest_size_bytes);
|
||||
Upload(in_buffer, reinterpret_cast<const void*>(image.info.guest_address),
|
||||
image.info.guest_size_bytes);
|
||||
|
||||
// Prepare output buffer
|
||||
auto out_buffer = AllocBuffer(image.info.guest_size_bytes, true);
|
||||
|
||||
scheduler.DeferOperation([=, this]() {
|
||||
FreeBuffer(in_buffer);
|
||||
FreeBuffer(out_buffer);
|
||||
});
|
||||
|
||||
auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl);
|
||||
|
||||
const vk::DescriptorBufferInfo input_buffer_info{
|
||||
.buffer = staging.Handle(),
|
||||
.offset = offset,
|
||||
.buffer = in_buffer.first,
|
||||
.offset = 0,
|
||||
.range = image.info.guest_size_bytes,
|
||||
};
|
||||
|
||||
ASSERT(image.view_for_detiler.has_value());
|
||||
const vk::DescriptorImageInfo output_image_info{
|
||||
.imageView = *image.view_for_detiler->image_view,
|
||||
.imageLayout = image.layout,
|
||||
const vk::DescriptorBufferInfo output_buffer_info{
|
||||
.buffer = out_buffer.first,
|
||||
.offset = 0,
|
||||
.range = image.info.guest_size_bytes,
|
||||
};
|
||||
|
||||
std::vector<vk::WriteDescriptorSet> set_writes{
|
||||
|
@ -357,20 +415,44 @@ bool TileManager::TryDetile(Image& image) {
|
|||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||
.pImageInfo = &output_image_info,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &output_buffer_info,
|
||||
},
|
||||
};
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *detiler->pl_layout, 0,
|
||||
set_writes);
|
||||
|
||||
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u,
|
||||
sizeof(image.info.pitch), &image.info.pitch);
|
||||
DetilerParams params;
|
||||
params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u);
|
||||
params.num_levels = image.info.resources.levels;
|
||||
|
||||
cmdbuf.dispatch((image.info.size.width * image.info.size.height) / 64, 1,
|
||||
1); // round to 64
|
||||
ASSERT(image.info.resources.levels <= 14);
|
||||
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
|
||||
for (int m = 0; m < image.info.resources.levels; ++m) {
|
||||
params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers +
|
||||
(m > 0 ? params.sizes[m - 1] : 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
auto pitch = image.info.pitch;
|
||||
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
|
||||
¶ms);
|
||||
|
||||
ASSERT((image.info.guest_size_bytes % 64) == 0);
|
||||
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u);
|
||||
const auto num_tiles = image.info.guest_size_bytes / (64 * (bpp / 8));
|
||||
cmdbuf.dispatch(num_tiles, 1, 1);
|
||||
|
||||
const vk::BufferMemoryBarrier post_barrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.buffer = out_buffer.first,
|
||||
.size = image.info.guest_size_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
{}, post_barrier, {});
|
||||
|
||||
return {out_buffer.first};
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -34,10 +34,16 @@ struct DetilerContext {
|
|||
|
||||
class TileManager {
|
||||
public:
|
||||
using ScratchBuffer = std::pair<VkBuffer, VmaAllocation>;
|
||||
|
||||
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||
~TileManager();
|
||||
|
||||
bool TryDetile(Image& image);
|
||||
std::optional<vk::Buffer> TryDetile(Image& image);
|
||||
|
||||
ScratchBuffer AllocBuffer(u32 size, bool is_storage = false);
|
||||
void Upload(ScratchBuffer buffer, const void* data, size_t size);
|
||||
void FreeBuffer(ScratchBuffer buffer);
|
||||
|
||||
private:
|
||||
const DetilerContext* GetDetiler(const Image& image) const;
|
||||
|
@ -45,7 +51,6 @@ private:
|
|||
private:
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::StreamBuffer staging;
|
||||
std::array<DetilerContext, DetilerType::Max> detilers;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue