texture_cache: detiler: added support for block coded 64bit images

This commit is contained in:
psucien 2024-06-09 18:18:49 +02:00
parent fa73812f32
commit 01f09cfa63
4 changed files with 61 additions and 3 deletions

View File

@ -4,6 +4,7 @@
set(SHADER_FILES set(SHADER_FILES
detile_m8x1.comp detile_m8x1.comp
detile_m32x1.comp detile_m32x1.comp
detile_m32x2.comp
detile_m32x4.comp detile_m32x4.comp
) )

View File

@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(std430, binding = 0) buffer input_buf {
uint in_data[];
};
layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img;
layout(push_constant) uniform image_info {
uint pitch;
} info;
// Inverse morton LUT, small enough to fit into K$
uint rmort[16] = {
0x11011000, 0x31213020,
0x13031202, 0x33233222,
0x51415040, 0x71617060,
0x53435242, 0x73637262,
0x15051404, 0x35253424,
0x17071606, 0x37273626,
0x55455444, 0x75657464,
0x57475646, 0x77677666,
};
#define MICRO_TILE_DIM (8)
void main() {
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
uint col = bitfieldExtract(packed_pos, 4, 4);
uint row = bitfieldExtract(packed_pos, 0, 4);
uint block_ofs = 2 * gl_GlobalInvocationID.x;
uint p0 = in_data[block_ofs + 0];
uint p1 = in_data[block_ofs + 1];
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
gl_WorkGroupID.x % tiles_per_pitch,
gl_WorkGroupID.x / tiles_per_pitch
);
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0));
}

View File

@ -9,6 +9,7 @@
#include "video_core/texture_cache/tile_manager.h" #include "video_core/texture_cache/tile_manager.h"
#include "video_core/host_shaders/detile_m32x1_comp.h" #include "video_core/host_shaders/detile_m32x1_comp.h"
#include "video_core/host_shaders/detile_m32x2_comp.h"
#include "video_core/host_shaders/detile_m32x4_comp.h" #include "video_core/host_shaders/detile_m32x4_comp.h"
#include "video_core/host_shaders/detile_m8x1_comp.h" #include "video_core/host_shaders/detile_m8x1_comp.h"
@ -175,12 +176,16 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_
vk::Format DemoteImageFormatForDetiling(vk::Format format) { vk::Format DemoteImageFormatForDetiling(vk::Format format) {
switch (format) { switch (format) {
case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eR32Uint;
case vk::Format::eR8Unorm: case vk::Format::eR8Unorm:
return vk::Format::eR8Uint; return vk::Format::eR8Uint;
case vk::Format::eB8G8R8A8Srgb:
[[fallthrough]];
case vk::Format::eR8G8B8A8Unorm:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock:
return vk::Format::eR32G32Uint;
case vk::Format::eBc3SrgbBlock: case vk::Format::eBc3SrgbBlock:
[[fallthrough]];
case vk::Format::eBc3UnormBlock: case vk::Format::eBc3UnormBlock:
return vk::Format::eR32G32B32A32Uint; return vk::Format::eR32G32B32A32Uint;
default: default:
@ -200,6 +205,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const {
return &detilers[DetilerType::Micro8x1]; return &detilers[DetilerType::Micro8x1];
case vk::Format::eR32Uint: case vk::Format::eR32Uint:
return &detilers[DetilerType::Micro32x1]; return &detilers[DetilerType::Micro32x1];
case vk::Format::eR32G32Uint:
return &detilers[DetilerType::Micro32x2];
case vk::Format::eR32G32B32A32Uint: case vk::Format::eR32G32B32A32Uint:
return &detilers[DetilerType::Micro32x4]; return &detilers[DetilerType::Micro32x4];
default: default:
@ -219,6 +226,7 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
static const std::array detiler_shaders{ static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X1_COMP,
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X1_COMP,
HostShaders::DETILE_M32X2_COMP,
HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_M32X4_COMP,
}; };

View File

@ -20,6 +20,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format);
enum DetilerType : u32 { enum DetilerType : u32 {
Micro8x1, Micro8x1,
Micro32x1, Micro32x1,
Micro32x2,
Micro32x4, Micro32x4,
Max Max