texture_cache: detiler: added missing micro8x2
This commit is contained in:
parent
2386b46726
commit
14377b39b5
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
set(SHADER_FILES
|
set(SHADER_FILES
|
||||||
detile_m8x1.comp
|
detile_m8x1.comp
|
||||||
|
detile_m8x2.comp
|
||||||
detile_m32x1.comp
|
detile_m32x1.comp
|
||||||
detile_m32x2.comp
|
detile_m32x2.comp
|
||||||
detile_m32x4.comp
|
detile_m32x4.comp
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
// NOTE: Current subgroup utilization is subotimal on most GPUs, so
|
||||||
|
// it will be nice to process two tiles at once here.
|
||||||
|
layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM 8
|
||||||
|
#define TEXELS_PER_ELEMENT 2
|
||||||
|
|
||||||
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
uint rmort[16] = {
|
||||||
|
0x11011000, 0x31213020,
|
||||||
|
0x13031202, 0x33233222,
|
||||||
|
0x51415040, 0x71617060,
|
||||||
|
0x53435242, 0x73637262,
|
||||||
|
|
||||||
|
0x15051404, 0x35253424,
|
||||||
|
0x17071606, 0x37273626,
|
||||||
|
0x55455444, 0x75657464,
|
||||||
|
0x57475646, 0x77677666,
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint src_tx = in_data[gl_GlobalInvocationID.x];
|
||||||
|
uint p[TEXELS_PER_ELEMENT] = {
|
||||||
|
(src_tx >> 16) & 0xffff,
|
||||||
|
src_tx & 0xffff
|
||||||
|
};
|
||||||
|
|
||||||
|
uint bit_ofs = 8 * TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x % 4);
|
||||||
|
uint packed_pos = rmort[gl_LocalInvocationID.x >> 1] >> bit_ofs;
|
||||||
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||||
|
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||||
|
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||||
|
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col;
|
||||||
|
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||||
|
|
||||||
|
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||||
|
uint p0 = (p[ofs] >> 8) & 0xff;
|
||||||
|
uint p1 = p[ofs] & 0xff;
|
||||||
|
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0));
|
||||||
|
}
|
||||||
|
}
|
|
@ -12,6 +12,7 @@
|
||||||
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
||||||
|
#include "video_core/host_shaders/detile_m8x2_comp.h"
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <magic_enum.hpp>
|
#include <magic_enum.hpp>
|
||||||
|
@ -177,6 +178,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Unorm:
|
case vk::Format::eR8Unorm:
|
||||||
return vk::Format::eR8Uint;
|
return vk::Format::eR8Uint;
|
||||||
|
case vk::Format::eR8G8Unorm:
|
||||||
|
return vk::Format::eR8G8Uint;
|
||||||
case vk::Format::eR8G8B8A8Srgb:
|
case vk::Format::eR8G8B8A8Srgb:
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case vk::Format::eB8G8R8A8Srgb:
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
|
@ -207,6 +210,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Uint:
|
case vk::Format::eR8Uint:
|
||||||
return &detilers[DetilerType::Micro8x1];
|
return &detilers[DetilerType::Micro8x1];
|
||||||
|
case vk::Format::eR8G8Uint:
|
||||||
|
return &detilers[DetilerType::Micro8x2];
|
||||||
case vk::Format::eR32Uint:
|
case vk::Format::eR32Uint:
|
||||||
return &detilers[DetilerType::Micro32x1];
|
return &detilers[DetilerType::Micro32x1];
|
||||||
case vk::Format::eR32G32Uint:
|
case vk::Format::eR32G32Uint:
|
||||||
|
@ -229,9 +234,8 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
||||||
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
||||||
|
|
||||||
static const std::array detiler_shaders{
|
static const std::array detiler_shaders{
|
||||||
HostShaders::DETILE_M8X1_COMP,
|
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||||
HostShaders::DETILE_M32X1_COMP,
|
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
|
||||||
HostShaders::DETILE_M32X2_COMP,
|
|
||||||
HostShaders::DETILE_M32X4_COMP,
|
HostShaders::DETILE_M32X4_COMP,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format);
|
||||||
|
|
||||||
enum DetilerType : u32 {
|
enum DetilerType : u32 {
|
||||||
Micro8x1,
|
Micro8x1,
|
||||||
|
Micro8x2,
|
||||||
Micro32x1,
|
Micro32x1,
|
||||||
Micro32x2,
|
Micro32x2,
|
||||||
Micro32x4,
|
Micro32x4,
|
||||||
|
|
Loading…
Reference in New Issue