diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 8553bc92..db2ee91c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -377,9 +377,13 @@ struct Liverpool { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } + u32 NumBits() const { + return z_info.format == ZFormat::Z32Float ? 32 : 16; + } + size_t GetDepthSliceSize() const { ASSERT(z_info.format != ZFormat::Invalid); - const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2; + const auto bpe = NumBits() >> 3; // in bytes return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); } }; diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index f3e84c75..fecea109 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(r32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -31,20 +35,22 @@ uint rmort[16] = { #define TEXELS_PER_ELEMENT (1) void main() { + uint tile_base = gl_GlobalInvocationID.x - gl_LocalInvocationID.x; // WG*16 + uint p0 = in_data[gl_GlobalInvocationID.x]; uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint p0 = in_data[gl_GlobalInvocationID.x]; + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - imageStore(output_img, img_pos, uvec4(p0, 0, 0, 0)); -} \ No newline at end of file + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM; + out_data[dw_ofs_x + dw_ofs_y] = p0; +} diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index 2853f8b7..c2caa62c 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -30,19 +34,25 @@ uint rmort[16] = { #define MICRO_TILE_DIM (8) void main() { + uint block_ofs = 2 * gl_GlobalInvocationID.x; + uint p0 = in_data[block_ofs + 0]; + uint p1 = in_data[block_ofs + 1]; + uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint block_ofs = 2 * gl_GlobalInvocationID.x; - uint p0 = in_data[block_ofs + 0]; - uint p1 = in_data[block_ofs + 1]; + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 8) >= info.sizes[m] ? 1 : 0; + } - uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4 - ivec2 img_pos = MICRO_TILE_DIM * ivec2( - gl_WorkGroupID.x % tiles_per_pitch, - gl_WorkGroupID.x / tiles_per_pitch - ); - imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0)); -} \ No newline at end of file + uint tiles_per_pitch = max((info.pitch >> mip) / MICRO_TILE_DIM, 1) * 2; + uint target_tile_x = 2 * gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = 2 * gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col * 2; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM; + out_data[dw_ofs_x + dw_ofs_y] = p0; + out_data[dw_ofs_x + dw_ofs_y + 1] = p1; +} diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index 64f34e6f..11353870 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -8,10 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rgba32ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; // Inverse morton LUT, small enough to fit into K$ @@ -30,21 +34,29 @@ uint rmort[16] = { #define MICRO_TILE_DIM (8) void main() { - uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); - uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; - uint col = bitfieldExtract(packed_pos, 4, 4); - uint row = bitfieldExtract(packed_pos, 0, 4); - uint block_ofs = 4 * gl_GlobalInvocationID.x; uint p0 = in_data[block_ofs + 0]; uint p1 = in_data[block_ofs + 1]; uint p2 = in_data[block_ofs + 2]; uint p3 = in_data[block_ofs + 3]; - uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4 - ivec2 img_pos = MICRO_TILE_DIM * ivec2( - gl_WorkGroupID.x % tiles_per_pitch, - gl_WorkGroupID.x / tiles_per_pitch - ); - imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, p2, p3)); -} \ No newline at end of file + uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); + uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; + uint col = bitfieldExtract(packed_pos, 4, 4); + uint row = bitfieldExtract(packed_pos, 0, 4); + + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 16) >= info.sizes[m] ? 1 : 0; + } + + uint tiles_per_pitch = max(((info.pitch >> mip) / MICRO_TILE_DIM), 1u) * 4; + uint target_tile_x = 4 * gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = 4 * gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = (target_tile_x * MICRO_TILE_DIM) + 4 * col; + uint dw_ofs_y = ((target_tile_y * tiles_per_pitch) * 64u) + ((row * tiles_per_pitch) * MICRO_TILE_DIM); + out_data[dw_ofs_x + dw_ofs_y] = p0; + out_data[dw_ofs_x + dw_ofs_y + 1] = p1; + out_data[dw_ofs_x + dw_ofs_y + 2] = p2; + out_data[dw_ofs_x + dw_ofs_y + 3] = p3; +} diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index b4d920e6..5ec48fae 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -11,10 +11,14 @@ layout (local_size_x = 16, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(r8ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; #define MICRO_TILE_DIM 8 @@ -32,17 +36,15 @@ void main() { uint row = (gl_LocalInvocationID.x % TEXELS_PER_ELEMENT) + TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x >> 3); - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint mip = 0; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } + + uint tiles_per_pitch = max((info.pitch >> mip) / 8, 1); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - - #pragma unroll - for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { - imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(dst_tx & 0xff)); - dst_tx >>= 8; - } + uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints + uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints + out_data[dw_ofs_x + dw_ofs_y] = dst_tx; } \ No newline at end of file diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index 1cebc12b..d27bc6e2 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -10,10 +10,14 @@ layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; layout(std430, binding = 0) buffer input_buf { uint in_data[]; }; -layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; layout(push_constant) uniform image_info { + uint num_levels; uint pitch; + uint sizes[14]; } info; #define MICRO_TILE_DIM 8 @@ -44,18 +48,14 @@ void main() { uint col = bitfieldExtract(packed_pos, 4, 4); uint row = bitfieldExtract(packed_pos, 0, 4); - uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint mip = 0u; + for (int m = 0; m < info.num_levels; ++m) { + mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0; + } + uint tiles_per_pitch = max(((info.pitch >> mip) / 8u), 1u); uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; - uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; - uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; - - ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); - - #pragma unroll - for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { - uint p0 = (p[ofs] >> 8) & 0xff; - uint p1 = p[ofs] & 0xff; - imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p1, p0, 0, 0)); - } + uint dw_ofs_x = target_tile_x * 8 + col; + uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * 8; + out_data[(dw_ofs_x + dw_ofs_y) / 2] = src_tx; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 90d97404..fc7943e6 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -392,6 +392,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eR16G16Sfloat; } + if (data_format == AmdGpu::DataFormat::Format16_16 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR16G16Unorm; + } if (data_format == AmdGpu::DataFormat::Format10_11_11 && num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eB10G11R11UfloatPack32; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 954adf44..51bb7f83 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -128,7 +128,9 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s for (const auto& image_desc : info.images) { const auto tsharp = info.ReadUd(image_desc.sgpr_base, image_desc.dword_offset); - const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); + VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f119bc77..eb552268 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -366,7 +366,9 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& for (const auto& image_desc : stage.images) { const auto& tsharp = tsharps.emplace_back( stage.ReadUd(image_desc.sgpr_base, image_desc.dword_offset)); - const auto& image_view = texture_cache.FindTexture(tsharp, image_desc.is_storage); + VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; + const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7f0b74ab..67994485 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,7 +191,7 @@ void PipelineCache::RefreshGraphicsKey() { LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), is_vo_surface); + base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 1499d877..0915514b 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -32,6 +32,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( switch (static_cast(callback_data->messageIdNumber)) { case 0x609a13b: // Vertex attribute at location not consumed by shader case 0xc81ad50e: + case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: break; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 67a88c47..c64f6089 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -120,7 +120,9 @@ void Rasterizer::BeginRendering() { } const auto& hint = liverpool->last_cb_extent[col_buf_id]; - const auto& image_view = texture_cache.FindRenderTarget(col_buf, hint); + VideoCore::ImageInfo image_info{col_buf, hint}; + VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; + const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); @@ -143,9 +145,10 @@ void Rasterizer::BeginRendering() { const bool is_clear = regs.depth_render_control.depth_clear_enable || texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; - const auto& image_view = texture_cache.FindDepthTarget( - regs.depth_buffer, regs.depth_view.NumSlices(), htile_address, hint, - regs.depth_control.depth_write_enable); + VideoCore::ImageInfo image_info{regs.depth_buffer, regs.depth_view.NumSlices(), + htile_address, hint}; + VideoCore::ImageViewInfo view_info{regs.depth_buffer, regs.depth_view, regs.depth_control}; + const auto& image_view = texture_cache.FindDepthTarget(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b4b3f48a..f7aef847 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -117,18 +117,15 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { ASSERT(info.pixel_format != vk::Format::eUndefined); + // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case + // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && - info.size.width == info.size.height) { + if (info.props.is_cube) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; - } - if (info.type == vk::ImageType::e3D) { + } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } - if (info.IsBlockCoded()) { - flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; - } usage = ImageUsageFlags(info); @@ -157,15 +154,6 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, }; image.Create(image_ci); - - // Create a special view for detiler - if (info.is_tiled) { - ImageViewInfo view_info; - view_info.format = DemoteImageFormatForDetiling(info.pixel_format); - view_for_detiler.emplace(*instance, view_info, *this, ImageId{}); - } - - Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 97ceaa09..b18f1002 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -105,7 +105,6 @@ struct Image { VAddr cpu_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; - std::optional view_for_detiler; // Resource state tracking vk::ImageUsageFlags usage; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 41ad0938..e01a61ae 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -47,33 +47,33 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { // clang-format off // The table of macro tiles parameters for given tiling index (row) and bpp (column) static constexpr std::array macro_tile_extents{ - std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 07 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0B - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0C - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E - std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 10 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 11 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 12 - std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 - std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 - std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 - std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 - std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{128u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A }; // clang-format on @@ -82,62 +82,65 @@ static constexpr auto hw_pipe_interleave = 256u; static constexpr std::pair GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) { ASSERT(num_samples == 1); - const auto row = tiling_idx * 4; - const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64 + const auto row = tiling_idx * 5; + const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128 return macro_tile_extents[row + column]; } -static constexpr size_t ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, u32 num_samples) { +static constexpr std::pair ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, + u32 num_samples) { const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8)); auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = height; - size_t log_sz = 1; - const auto slice_align = std::max(64u, hw_pipe_interleave / (bpp + 7) / 8); + size_t log_sz = pitch_aligned * height_aligned * num_samples; + const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8)); while (log_sz % slice_align) { - log_sz = pitch_aligned * height_aligned * num_samples; pitch_aligned += pitch_align; + log_sz = pitch_aligned * height_aligned * num_samples; } - return (log_sz * bpp + 7) / 8; + return {pitch_aligned, (log_sz * bpp + 7) / 8}; } -static constexpr size_t ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples) { +static constexpr std::pair ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp, + u32 num_samples) { const auto& [pitch_align, height_align] = micro_tile_extent; auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); - size_t log_sz = 1; + size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; while (log_sz % 256) { - log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; pitch_aligned += 8; + log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; } - return log_sz; + return {pitch_aligned, log_sz}; } -static constexpr size_t ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, u32 num_samples, - u32 tiling_idx) { +static constexpr std::pair ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, + u32 num_samples, u32 tiling_idx) { const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples); ASSERT(pitch_align != 0 && height_align != 0); const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); - return (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8; + const auto log_sz = pitch_aligned * height_aligned * num_samples; + return {pitch_aligned, (log_sz * bpp + 7) / 8}; } ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept { const auto& attrib = group.attrib; - is_tiled = attrib.tiling_mode == TilingMode::Tile; - tiling_mode = - is_tiled ? AmdGpu::TilingMode::Display_MacroTiled : AmdGpu::TilingMode::Display_Linear; + props.is_tiled = attrib.tiling_mode == TilingMode::Tile; + tiling_mode = props.is_tiled ? AmdGpu::TilingMode::Display_MacroTiled + : AmdGpu::TilingMode::Display_Linear; pixel_format = ConvertPixelFormat(attrib.pixel_format); type = vk::ImageType::e2D; size.width = attrib.width; size.height = attrib.height; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); usage.vo_buffer = true; - const bool is_32bpp = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float; - ASSERT(is_32bpp); + num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64; + ASSERT(num_bits == 32); guest_address = cpu_address; - if (!is_tiled) { + if (!props.is_tiled) { guest_size_bytes = pitch * size.height * 4; } else { if (Config::isNeoMode()) { @@ -146,15 +149,16 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4; } } - mips_layout.emplace_back(0, guest_size_bytes); + mips_layout.emplace_back(guest_size_bytes, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { - is_tiled = buffer.IsTiled(); + props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); num_samples = 1 << buffer.attrib.num_fragments_log2; + num_bits = NumBits(buffer.info.format); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); @@ -168,15 +172,16 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, guest_address = buffer.Address(); const auto color_slice_sz = buffer.GetColorSliceSize(); guest_size_bytes = color_slice_sz * buffer.NumSlices(); - mips_layout.emplace_back(0, color_slice_sz); + mips_layout.emplace_back(color_slice_sz, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { - is_tiled = false; + props.is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = vk::ImageType::e2D; num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 + num_bits = buffer.NumBits(); size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; @@ -188,37 +193,38 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size_bytes = depth_slice_sz * num_slices; - mips_layout.emplace_back(0, depth_slice_sz); + mips_layout.emplace_back(depth_slice_sz, pitch, 0); } ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { - is_tiled = image.IsTiled(); tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); type = ConvertImageType(image.GetType()); - is_cube = image.GetType() == AmdGpu::ImageType::Cube; - is_volume = image.GetType() == AmdGpu::ImageType::Color3D; + props.is_tiled = image.IsTiled(); + props.is_cube = image.GetType() == AmdGpu::ImageType::Cube; + props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D; + props.is_pow2 = image.pow2pad; + props.is_block = IsBlockCoded(); size.width = image.width + 1; size.height = image.height + 1; - size.depth = is_volume ? image.depth + 1 : 1; + size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); resources.layers = image.NumLayers(); + num_bits = NumBits(image.GetDataFmt()); usage.texture = true; guest_address = image.Address(); mips_layout.reserve(resources.levels); - const auto num_bits = NumBits(image.GetDataFmt()); - const auto is_block = IsBlockCoded(); - const auto is_pow2 = image.pow2pad; + MipInfo mip_info{}; guest_size_bytes = 0; for (auto mip = 0u; mip < resources.levels; ++mip) { auto bpp = num_bits; auto mip_w = pitch >> mip; auto mip_h = size.height >> mip; - if (is_block) { + if (props.is_block) { mip_w = (mip_w + 3) / 4; mip_h = (mip_h + 3) / 4; bpp *= 16; @@ -227,40 +233,48 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { mip_h = std::max(mip_h, 1u); auto mip_d = std::max(size.depth >> mip, 1u); - if (is_pow2) { + if (props.is_pow2) { mip_w = std::bit_ceil(mip_w); mip_h = std::bit_ceil(mip_h); mip_d = std::bit_ceil(mip_d); } - size_t mip_size = 0; switch (tiling_mode) { case AmdGpu::TilingMode::Display_Linear: { - ASSERT(!is_cube); - mip_size = ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples); + ASSERT(!props.is_cube); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeLinearAligned(mip_w, mip_h, bpp, num_samples); + mip_info.height = mip_h; break; } case AmdGpu::TilingMode::Texture_MicroTiled: { - mip_size = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); + mip_info.height = std::max(mip_h, 8u); + if (props.is_block) { + mip_info.pitch = std::max(mip_info.pitch * 4, 32u); + mip_info.height = std::max(mip_info.height * 4, 32u); + } break; } case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { - ASSERT(!is_cube && !is_block); + ASSERT(!props.is_cube && !props.is_block); ASSERT(num_samples == 1); - ASSERT(num_bits <= 64); - mip_size = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); + std::tie(mip_info.pitch, mip_info.size) = + ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); break; } default: { UNREACHABLE(); } } - mip_size *= mip_d; + mip_info.size *= mip_d; - mips_layout.emplace_back(guest_size_bytes, mip_size); - guest_size_bytes += mip_size; + mip_info.offset = guest_size_bytes; + mips_layout.emplace_back(mip_info); + guest_size_bytes += mip_info.size; } guest_size_bytes *= resources.layers; } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index b98410b9..9dad0dd6 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -9,6 +9,8 @@ #include "video_core/amdgpu/liverpool.h" #include "video_core/texture_cache/types.h" +#include + namespace VideoCore { struct ImageInfo { @@ -42,18 +44,29 @@ struct ImageInfo { u32 vo_buffer : 1; } usage{}; // Usage data tracked during image lifetime - bool is_cube = false; - bool is_volume = false; - bool is_tiled = false; - bool is_read_only = false; + struct { + u32 is_cube : 1; + u32 is_volume : 1; + u32 is_tiled : 1; + u32 is_pow2 : 1; + u32 is_block : 1; + } props{}; // Surface properties with impact on various calculation factors + vk::Format pixel_format = vk::Format::eUndefined; vk::ImageType type = vk::ImageType::e1D; SubresourceExtent resources; Extent3D size{1, 1, 1}; + u32 num_bits{}; u32 num_samples = 1; u32 pitch = 0; AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear}; - std::vector> mips_layout; + struct MipInfo { + u32 size; + u32 pitch; + u32 height; + u32 offset; + }; + boost::container::small_vector mips_layout; VAddr guest_address{0}; u32 guest_size_bytes{0}; }; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 8f972253..ff85a8aa 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/image.h" @@ -50,15 +51,18 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexce : is_storage{is_storage} { type = ConvertImageViewType(image.GetType()); format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); - range.base.level = static_cast(image.base_level); - range.base.layer = static_cast(image.base_array); - range.extent.levels = image.NumLevels(); - range.extent.layers = image.NumLayers(); - if (!is_storage) { - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + range.base.level = image.base_level; + range.base.layer = image.base_array; + range.extent.levels = image.last_level + 1; + range.extent.layers = image.last_array + 1; + mapping.r = ConvertComponentSwizzle(image.dst_sel_x); + mapping.g = ConvertComponentSwizzle(image.dst_sel_y); + mapping.b = ConvertComponentSwizzle(image.dst_sel_z); + mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + // Check for unfortunate case of storage images being swizzled + if (is_storage && (mapping != vk::ComponentMapping{})) { + LOG_ERROR(Render_Vulkan, "Storage image requires swizzling"); + mapping = vk::ComponentMapping{}; } } @@ -70,6 +74,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } +ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, + AmdGpu::Liverpool::DepthView view, + AmdGpu::Liverpool::DepthControl ctl) { + format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format, + depth_buffer.stencil_info.format); + is_storage = ctl.depth_write_enable; + range.base.layer = view.slice_start; + range.extent.layers = view.NumSlices(); +} + ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, ImageId image_id_, std::optional usage_override /*= {}*/) : info{info_}, image_id{image_id_} { @@ -93,10 +107,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .components = instance.GetSupportedComponentSwizzle(format, info.mapping), .subresourceRange{ .aspectMask = aspect, - .baseMipLevel = 0U, - .levelCount = 1, + .baseMipLevel = info.range.base.level, + .levelCount = info.range.extent.levels - info.range.base.level, .baseArrayLayer = info_.range.base.layer, - .layerCount = image.info.IsBlockCoded() ? 1 : VK_REMAINING_ARRAY_LAYERS, + .layerCount = info.range.extent.layers - info.range.base.layer, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index b43f65de..590ac9be 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -18,10 +18,11 @@ class Scheduler; namespace VideoCore { struct ImageViewInfo { - explicit ImageViewInfo() = default; - explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; - explicit ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, - bool is_vo_surface) noexcept; + ImageViewInfo() = default; + ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; + ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, + AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 55bb99cc..9131e6f1 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -152,8 +152,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { image_id = image_ids[0]; } - RegisterMeta(info, image_id); - Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { RefreshImage(image); @@ -184,13 +182,12 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) { - const ImageInfo info{desc}; +ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); Image& image = slot_images[image_id]; auto& usage = image.info.usage; - if (is_storage) { + if (view_info.is_storage) { image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); usage.storage = true; } else { @@ -201,14 +198,36 @@ ImageView& TextureCache::FindTexture(const AmdGpu::Image& desc, bool is_storage) usage.texture = true; } - const ImageViewInfo view_info{desc, is_storage}; - return RegisterImageView(image_id, view_info); + // These changes are temporary and should be removed once texture cache will handle subresources + // merging + auto view_info_tmp = view_info; + if (view_info_tmp.range.base.level > image.info.resources.levels - 1 || + view_info_tmp.range.base.layer > image.info.resources.layers - 1 || + view_info_tmp.range.extent.levels > image.info.resources.levels || + view_info_tmp.range.extent.layers > image.info.resources.layers) { + + LOG_ERROR(Render_Vulkan, + "Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})", + view_info_tmp.range.base.level, view_info_tmp.range.extent.levels, + view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers, + image.info.resources.levels, image.info.resources.layers); + + view_info_tmp.range.base.level = + std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1); + view_info_tmp.range.base.layer = + std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1); + view_info_tmp.range.extent.levels = + std::min(view_info_tmp.range.extent.levels, image.info.resources.levels); + view_info_tmp.range.extent.layers = + std::min(view_info_tmp.range.extent.layers, image.info.resources.layers); + } + + return RegisterImageView(image_id, view_info_tmp); } -ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint) { - const ImageInfo info{buffer, hint}; - const ImageId image_id = FindImage(info); +ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info) { + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; @@ -216,30 +235,56 @@ ImageView& TextureCache::FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eColorAttachmentRead); + // Register meta data for this color buffer + if (!(image.flags & ImageFlagBits::MetaRegistered)) { + if (image_info.meta_info.cmask_addr) { + surface_metas.emplace( + image_info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); + image.info.meta_info.cmask_addr = image_info.meta_info.cmask_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + + if (image_info.meta_info.fmask_addr) { + surface_metas.emplace( + image_info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); + image.info.meta_info.fmask_addr = image_info.meta_info.fmask_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + } + + // Update tracked image usage image.info.usage.render_target = true; - ImageViewInfo view_info{buffer, !!image.info.usage.vo_buffer}; return RegisterImageView(image_id, view_info); } -ImageView& TextureCache::FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled) { - const ImageInfo info{buffer, num_slices, htile_address, hint}; - const ImageId image_id = FindImage(info, false); +ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info) { + const ImageId image_id = FindImage(image_info, false); Image& image = slot_images[image_id]; image.flags &= ~ImageFlagBits::CpuModified; - const auto new_layout = write_enabled ? vk::ImageLayout::eDepthStencilAttachmentOptimal - : vk::ImageLayout::eDepthStencilReadOnlyOptimal; + const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal + : vk::ImageLayout::eDepthStencilReadOnlyOptimal; image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); + // Register meta data for this depth buffer + if (!(image.flags & ImageFlagBits::MetaRegistered)) { + if (image_info.meta_info.htile_addr) { + surface_metas.emplace( + image_info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); + image.info.meta_info.htile_addr = image_info.meta_info.htile_addr; + image.flags |= ImageFlagBits::MetaRegistered; + } + } + + // Update tracked image usage image.info.usage.depth_target = true; - ImageViewInfo view_info; - view_info.format = info.pixel_format; return RegisterImageView(image_id, view_info); } @@ -247,64 +292,56 @@ void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - { - if (!tile_manager.TryDetile(image)) { - // Upload data to the staging buffer. - const auto offset = staging.Copy(image.cpu_addr, image.info.guest_size_bytes, 4); - // Copy to the image. - image.Upload(staging.Handle(), offset); - } + scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); - return; + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + vk::Buffer buffer{staging.Handle()}; + u32 offset{0}; + + auto upload_buffer = tile_manager.TryDetile(image); + if (upload_buffer) { + buffer = *upload_buffer; + } else { + // Upload data to the staging buffer. + const auto [data, offset_, _] = staging.Map(image.info.guest_size_bytes, 16); + std::memcpy(data, (void*)image.info.guest_address, image.info.guest_size_bytes); + staging.Commit(image.info.guest_size_bytes); + offset = offset_; } - ASSERT(image.info.resources.levels == image.info.mips_layout.size()); - const u8* image_data = reinterpret_cast(image.cpu_addr); - for (u32 m = 0; m < image.info.resources.levels; m++) { + const auto& num_layers = image.info.resources.layers; + const auto& num_mips = image.info.resources.levels; + ASSERT(num_mips == image.info.mips_layout.size()); + + boost::container::small_vector image_copy{}; + for (u32 m = 0; m < num_mips; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = image.info.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const u32 map_size = image.info.mips_layout[m].second * image.info.resources.layers; + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(map_size, 16); - if (image.info.is_tiled) { - ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); - } else { - std::memcpy(data, - image_data + image.info.mips_layout[m].first * image.info.resources.layers, - map_size); - } - staging.Commit(map_size); - - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, + image_copy.push_back({ + .bufferOffset = offset + mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), .imageSubresource{ .aspectMask = vk::ImageAspectFlagBits::eColor, .mipLevel = m, .baseArrayLayer = 0, - .layerCount = u32(image.info.resources.layers), + .layerCount = num_layers, }, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, - }; - - scheduler.EndRendering(); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); - - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + }); } + + cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { @@ -320,47 +357,8 @@ void TextureCache::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); -} -void TextureCache::RegisterMeta(const ImageInfo& info, ImageId image_id) { - Image& image = slot_images[image_id]; - - if (image.flags & ImageFlagBits::MetaRegistered) { - return; - } - - bool registered = true; - // Current resource tracking implementation allows us to detect usage of meta only in the last - // moment, so we likely will miss its first clear. To avoid this and make first frame, where - // the meta is encountered, looks correct we set its state to "cleared" at registrations time. - if (info.usage.render_target) { - if (info.meta_info.cmask_addr) { - surface_metas.emplace( - info.meta_info.cmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); - image.info.meta_info.cmask_addr = info.meta_info.cmask_addr; - } - - if (info.meta_info.fmask_addr) { - surface_metas.emplace( - info.meta_info.fmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); - image.info.meta_info.fmask_addr = info.meta_info.fmask_addr; - } - } else if (info.usage.depth_target) { - if (info.meta_info.htile_addr) { - surface_metas.emplace( - info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); - image.info.meta_info.htile_addr = info.meta_info.htile_addr; - } - } else { - registered = false; - } - - if (registered) { - image.flags |= ImageFlagBits::MetaRegistered; - } + image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); } void TextureCache::UnregisterImage(ImageId image_id) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8a618983..aef33bcf 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -51,17 +51,16 @@ public: [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindTexture(const AmdGpu::Image& image, bool is_storage); + [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, + const ImageViewInfo& view_info); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& FindRenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint); + [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info); /// Retrieves the depth target with specified properties - [[nodiscard]] ImageView& FindDepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, - u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_enabled); + [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, + const ImageViewInfo& view_info); /// Reuploads image contents. void RefreshImage(Image& image); @@ -158,9 +157,6 @@ private: /// Register image in the page table void RegisterImage(ImageId image); - /// Register metadata surfaces attached to the image - void RegisterMeta(const ImageInfo& info, ImageId image); - /// Unregister image from the page table void UnregisterImage(ImageId image); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index ace2e4d5..4864b9db 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -16,6 +16,7 @@ #include #include +#include namespace VideoCore { @@ -176,6 +177,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { return vk::Format::eR8Uint; case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: + case vk::Format::eR16Unorm: return vk::Format::eR8G8Uint; case vk::Format::eR8G8B8A8Srgb: case vk::Format::eB8G8R8A8Srgb: @@ -183,10 +185,13 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: + case vk::Format::eR16G16Sfloat: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc4UnormBlock: case vk::Format::eR32G32Sfloat: + case vk::Format::eR32G32Uint: + case vk::Format::eR16G16B16A16Unorm: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2UnormBlock: @@ -225,14 +230,14 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { return nullptr; } -static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer | - vk::BufferUsageFlagBits::eStorageBuffer; +struct DetilerParams { + u32 num_levels; + u32 pitch0; + u32 sizes[14]; +}; TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) - : instance{instance}, scheduler{scheduler}, - staging{instance, scheduler, StagingFlags, 256_MB, Vulkan::BufferType::Upload} { - + : instance{instance}, scheduler{scheduler} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, @@ -264,7 +269,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc }, { .binding = 1, - .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }, @@ -281,7 +286,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc const vk::PushConstantRange push_constants = { .stageFlags = vk::ShaderStageFlagBits::eCompute, .offset = 0, - .size = sizeof(u32), + .size = sizeof(DetilerParams), }; const vk::DescriptorSetLayout set_layout = *desc_layout; @@ -312,35 +317,88 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc TileManager::~TileManager() = default; -bool TileManager::TryDetile(Image& image) { - if (!image.info.is_tiled) { - return false; +TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*= false*/) { + const auto usage = vk::BufferUsageFlagBits::eStorageBuffer | + (is_storage ? vk::BufferUsageFlagBits::eTransferSrc + : vk::BufferUsageFlagBits::eTransferDst); + const vk::BufferCreateInfo buffer_ci{ + .size = size, + .usage = usage, + }; + + const bool is_large_buffer = size > 128_MB; + VmaAllocationCreateInfo alloc_info{ + .flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + : static_cast(0), + .usage = is_large_buffer ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST + : VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = !is_storage ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + : static_cast(0), + }; + + VkBuffer buffer; + VmaAllocation allocation; + const auto buffer_ci_unsafe = static_cast(buffer_ci); + const auto result = vmaCreateBuffer(instance.GetAllocator(), &buffer_ci_unsafe, &alloc_info, + &buffer, &allocation, nullptr); + ASSERT(result == VK_SUCCESS); + return {buffer, allocation}; +} + +void TileManager::Upload(ScratchBuffer buffer, const void* data, size_t size) { + VmaAllocationInfo alloc_info{}; + vmaGetAllocationInfo(instance.GetAllocator(), buffer.second, &alloc_info); + ASSERT(size <= alloc_info.size); + void* ptr{}; + const auto result = vmaMapMemory(instance.GetAllocator(), buffer.second, &ptr); + ASSERT(result == VK_SUCCESS); + std::memcpy(ptr, data, size); + vmaUnmapMemory(instance.GetAllocator(), buffer.second); +} + +void TileManager::FreeBuffer(ScratchBuffer buffer) { + vmaDestroyBuffer(instance.GetAllocator(), buffer.first, buffer.second); +} + +std::optional TileManager::TryDetile(Image& image) { + if (!image.info.props.is_tiled) { + return std::nullopt; } const auto* detiler = GetDetiler(image); if (!detiler) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); - return false; + return std::nullopt; } - const auto offset = - staging.Copy(image.cpu_addr, image.info.guest_size_bytes, instance.StorageMinAlignment()); - image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); + // Prepare input buffer + auto in_buffer = AllocBuffer(image.info.guest_size_bytes); + Upload(in_buffer, reinterpret_cast(image.info.guest_address), + image.info.guest_size_bytes); + + // Prepare output buffer + auto out_buffer = AllocBuffer(image.info.guest_size_bytes, true); + + scheduler.DeferOperation([=, this]() { + FreeBuffer(in_buffer); + FreeBuffer(out_buffer); + }); auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl); const vk::DescriptorBufferInfo input_buffer_info{ - .buffer = staging.Handle(), - .offset = offset, + .buffer = in_buffer.first, + .offset = 0, .range = image.info.guest_size_bytes, }; - ASSERT(image.view_for_detiler.has_value()); - const vk::DescriptorImageInfo output_image_info{ - .imageView = *image.view_for_detiler->image_view, - .imageLayout = image.layout, + const vk::DescriptorBufferInfo output_buffer_info{ + .buffer = out_buffer.first, + .offset = 0, + .range = image.info.guest_size_bytes, }; std::vector set_writes{ @@ -357,20 +415,44 @@ bool TileManager::TryDetile(Image& image) { .dstBinding = 1, .dstArrayElement = 0, .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageImage, - .pImageInfo = &output_image_info, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &output_buffer_info, }, }; cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *detiler->pl_layout, 0, set_writes); - cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, - sizeof(image.info.pitch), &image.info.pitch); + DetilerParams params; + params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); + params.num_levels = image.info.resources.levels; - cmdbuf.dispatch((image.info.size.width * image.info.size.height) / 64, 1, - 1); // round to 64 + ASSERT(image.info.resources.levels <= 14); + std::memset(¶ms.sizes, 0, sizeof(params.sizes)); + for (int m = 0; m < image.info.resources.levels; ++m) { + params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + (m > 0 ? params.sizes[m - 1] : 0); + } - return true; + auto pitch = image.info.pitch; + cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params), + ¶ms); + + ASSERT((image.info.guest_size_bytes % 64) == 0); + const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto num_tiles = image.info.guest_size_bytes / (64 * (bpp / 8)); + cmdbuf.dispatch(num_tiles, 1, 1); + + const vk::BufferMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .buffer = out_buffer.first, + .size = image.info.guest_size_bytes, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, post_barrier, {}); + + return {out_buffer.first}; } } // namespace VideoCore diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 98a33786..9102da08 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -34,10 +34,16 @@ struct DetilerContext { class TileManager { public: + using ScratchBuffer = std::pair; + TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - bool TryDetile(Image& image); + std::optional TryDetile(Image& image); + + ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); + void Upload(ScratchBuffer buffer, const void* data, size_t size); + void FreeBuffer(ScratchBuffer buffer); private: const DetilerContext* GetDetiler(const Image& image) const; @@ -45,7 +51,6 @@ private: private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; - Vulkan::StreamBuffer staging; std::array detilers; };