From 01f09cfa6332d91982305a6e6874cb3a840b1bce Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 9 Jun 2024 18:18:49 +0200 Subject: [PATCH] texture_cache: detiler: added support for block coded 64bit images --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/detile_m32x2.comp | 48 +++++++++++++++++++ src/video_core/texture_cache/tile_manager.cpp | 14 ++++-- src/video_core/texture_cache/tile_manager.h | 1 + 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 src/video_core/host_shaders/detile_m32x2.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index b3701fb6..f9b948c3 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -4,6 +4,7 @@ set(SHADER_FILES detile_m8x1.comp detile_m32x1.comp + detile_m32x2.comp detile_m32x4.comp ) diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp new file mode 100644 index 00000000..2853f8b7 --- /dev/null +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img; + +layout(push_constant) uniform image_info { + uint pitch; +} info; + +// Inverse morton LUT, small enough to fit into K$ +uint rmort[16] = { + 0x11011000, 0x31213020, + 0x13031202, 0x33233222, + 0x51415040, 0x71617060, + 0x53435242, 0x73637262, + + 0x15051404, 0x35253424, + 0x17071606, 0x37273626, + 0x55455444, 0x75657464, + 0x57475646, 0x77677666, +}; + +#define MICRO_TILE_DIM (8) + +void main() { + uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4); + uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs; + uint col = bitfieldExtract(packed_pos, 4, 4); + uint row = bitfieldExtract(packed_pos, 0, 4); + + uint block_ofs = 2 * gl_GlobalInvocationID.x; + uint p0 = in_data[block_ofs + 0]; + uint p1 = in_data[block_ofs + 1]; + + uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4 + ivec2 img_pos = MICRO_TILE_DIM * ivec2( + gl_WorkGroupID.x % tiles_per_pitch, + gl_WorkGroupID.x / tiles_per_pitch + ); + imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0)); +} \ No newline at end of file diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 6d3ee1f3..36e1d1e1 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -9,6 +9,7 @@ #include "video_core/texture_cache/tile_manager.h" #include "video_core/host_shaders/detile_m32x1_comp.h" +#include "video_core/host_shaders/detile_m32x2_comp.h" #include "video_core/host_shaders/detile_m32x4_comp.h" #include "video_core/host_shaders/detile_m8x1_comp.h" @@ -175,12 +176,16 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { - case vk::Format::eB8G8R8A8Srgb: - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eR32Uint; case vk::Format::eR8Unorm: return vk::Format::eR8Uint; + case vk::Format::eB8G8R8A8Srgb: + [[fallthrough]]; + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eR32Uint; + case vk::Format::eBc1RgbaUnormBlock: + return vk::Format::eR32G32Uint; case vk::Format::eBc3SrgbBlock: + [[fallthrough]]; case vk::Format::eBc3UnormBlock: return vk::Format::eR32G32B32A32Uint; default: @@ -200,6 +205,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { return &detilers[DetilerType::Micro8x1]; case vk::Format::eR32Uint: return &detilers[DetilerType::Micro32x1]; + case vk::Format::eR32G32Uint: + return &detilers[DetilerType::Micro32x2]; case vk::Format::eR32G32B32A32Uint: return &detilers[DetilerType::Micro32x4]; default: @@ -219,6 +226,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M32X1_COMP, + HostShaders::DETILE_M32X2_COMP, HostShaders::DETILE_M32X4_COMP, }; diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index b29cfb97..3a74de22 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -20,6 +20,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format); enum DetilerType : u32 { Micro8x1, Micro32x1, + Micro32x2, Micro32x4, Max