From 14377b39b51fdd334fde4b609dd9e5b9876d7bc3 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 30 Jun 2024 15:54:59 +0200 Subject: [PATCH] texture_cache: detiler: added missing micro8x2 --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/detile_m8x2.comp | 61 +++++++++++++++++++ src/video_core/texture_cache/tile_manager.cpp | 10 ++- src/video_core/texture_cache/tile_manager.h | 1 + 4 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 src/video_core/host_shaders/detile_m8x2.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f9b948c3..f2b6cc2d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,6 +3,7 @@ set(SHADER_FILES detile_m8x1.comp + detile_m8x2.comp detile_m32x1.comp detile_m32x2.comp detile_m32x4.comp diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp new file mode 100644 index 00000000..d93f9a7f --- /dev/null +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +// NOTE: Current subgroup utilization is subotimal on most GPUs, so +// it will be nice to process two tiles at once here. +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img; + +layout(push_constant) uniform image_info { + uint pitch; +} info; + +#define MICRO_TILE_DIM 8 +#define TEXELS_PER_ELEMENT 2 + +// Inverse morton LUT, small enough to fit into K$ +uint rmort[16] = { + 0x11011000, 0x31213020, + 0x13031202, 0x33233222, + 0x51415040, 0x71617060, + 0x53435242, 0x73637262, + + 0x15051404, 0x35253424, + 0x17071606, 0x37273626, + 0x55455444, 0x75657464, + 0x57475646, 0x77677666, +}; + +void main() { + uint src_tx = in_data[gl_GlobalInvocationID.x]; + uint p[TEXELS_PER_ELEMENT] = { + (src_tx >> 16) & 0xffff, + src_tx & 0xffff + }; + + uint bit_ofs = 8 * TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x % 4); + uint packed_pos = rmort[gl_LocalInvocationID.x >> 1] >> bit_ofs; + uint col = bitfieldExtract(packed_pos, 4, 4); + uint row = bitfieldExtract(packed_pos, 0, 4); + + uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; + uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; + + ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); + + #pragma unroll + for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { + uint p0 = (p[ofs] >> 8) & 0xff; + uint p1 = p[ofs] & 0xff; + imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0)); + } +} diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index b2ff753b..0b6fd0eb 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -12,6 +12,7 @@ #include "video_core/host_shaders/detile_m32x2_comp.h" #include "video_core/host_shaders/detile_m32x4_comp.h" #include "video_core/host_shaders/detile_m8x1_comp.h" +#include "video_core/host_shaders/detile_m8x2_comp.h" #include #include @@ -177,6 +178,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { case vk::Format::eR8Unorm: return vk::Format::eR8Uint; + case vk::Format::eR8G8Unorm: + return vk::Format::eR8G8Uint; case vk::Format::eR8G8B8A8Srgb: [[fallthrough]]; case vk::Format::eB8G8R8A8Srgb: @@ -207,6 +210,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { switch (format) { case vk::Format::eR8Uint: return &detilers[DetilerType::Micro8x1]; + case vk::Format::eR8G8Uint: + return &detilers[DetilerType::Micro8x2]; case vk::Format::eR32Uint: return &detilers[DetilerType::Micro32x1]; case vk::Format::eR32G32Uint: @@ -229,9 +234,8 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} { static const std::array detiler_shaders{ - HostShaders::DETILE_M8X1_COMP, - HostShaders::DETILE_M32X1_COMP, - HostShaders::DETILE_M32X2_COMP, + HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, + HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, HostShaders::DETILE_M32X4_COMP, }; diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 3a74de22..98a33786 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -19,6 +19,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format); enum DetilerType : u32 { Micro8x1, + Micro8x2, Micro32x1, Micro32x2, Micro32x4,