Merge pull request #183 from shadps4-emu/bc_detiler
Micro detiler for 2D BC textures
This commit is contained in:
commit
0c1ede1060
|
@ -3,7 +3,9 @@
|
||||||
|
|
||||||
set(SHADER_FILES
|
set(SHADER_FILES
|
||||||
detile_m8x1.comp
|
detile_m8x1.comp
|
||||||
detile_m8x4.comp
|
detile_m32x1.comp
|
||||||
|
detile_m32x2.comp
|
||||||
|
detile_m32x4.comp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||||
|
|
|
@ -8,14 +8,14 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||||
layout(std430, binding = 0) buffer input_buf {
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
uint in_data[];
|
uint in_data[];
|
||||||
};
|
};
|
||||||
layout(rgba8ui, binding = 1) uniform writeonly uimage2D output_img;
|
layout(r32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
layout(push_constant) uniform image_info {
|
layout(push_constant) uniform image_info {
|
||||||
uint pitch;
|
uint pitch;
|
||||||
} info;
|
} info;
|
||||||
|
|
||||||
// Inverse morton LUT, small enough to fit into K$
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
uint lut_8x4[16] = {
|
uint rmort[16] = {
|
||||||
0x11011000, 0x31213020,
|
0x11011000, 0x31213020,
|
||||||
0x13031202, 0x33233222,
|
0x13031202, 0x33233222,
|
||||||
0x51415040, 0x71617060,
|
0x51415040, 0x71617060,
|
||||||
|
@ -27,17 +27,17 @@ uint lut_8x4[16] = {
|
||||||
0x57475646, 0x77677666,
|
0x57475646, 0x77677666,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MICRO_TILE_DIM 8
|
#define MICRO_TILE_DIM (8)
|
||||||
#define TEXELS_PER_ELEMENT 1
|
#define TEXELS_PER_ELEMENT (1)
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint src_tx = in_data[gl_GlobalInvocationID.x];
|
|
||||||
|
|
||||||
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||||
uint packed_pos = lut_8x4[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||||
uint col = bitfieldExtract(packed_pos, 4, 4);
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
uint row = bitfieldExtract(packed_pos, 0, 4);
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint p0 = in_data[gl_GlobalInvocationID.x];
|
||||||
|
|
||||||
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||||
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||||
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||||
|
@ -46,12 +46,5 @@ void main() {
|
||||||
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||||
|
|
||||||
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||||
|
imageStore(output_img, img_pos, uvec4(p0, 0, 0, 0));
|
||||||
uvec4 dst_tx = uvec4(
|
|
||||||
bitfieldExtract(src_tx, 0, 8),
|
|
||||||
bitfieldExtract(src_tx, 8, 8),
|
|
||||||
bitfieldExtract(src_tx, 16, 8),
|
|
||||||
bitfieldExtract(src_tx, 24, 8)
|
|
||||||
);
|
|
||||||
imageStore(output_img, img_pos, dst_tx);
|
|
||||||
}
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(rg32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
uint rmort[16] = {
|
||||||
|
0x11011000, 0x31213020,
|
||||||
|
0x13031202, 0x33233222,
|
||||||
|
0x51415040, 0x71617060,
|
||||||
|
0x53435242, 0x73637262,
|
||||||
|
|
||||||
|
0x15051404, 0x35253424,
|
||||||
|
0x17071606, 0x37273626,
|
||||||
|
0x55455444, 0x75657464,
|
||||||
|
0x57475646, 0x77677666,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM (8)
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||||
|
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||||
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint block_ofs = 2 * gl_GlobalInvocationID.x;
|
||||||
|
uint p0 = in_data[block_ofs + 0];
|
||||||
|
uint p1 = in_data[block_ofs + 1];
|
||||||
|
|
||||||
|
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
|
||||||
|
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
|
||||||
|
gl_WorkGroupID.x % tiles_per_pitch,
|
||||||
|
gl_WorkGroupID.x / tiles_per_pitch
|
||||||
|
);
|
||||||
|
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, 0, 0));
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(rgba32ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
uint rmort[16] = {
|
||||||
|
0x11011000, 0x31213020,
|
||||||
|
0x13031202, 0x33233222,
|
||||||
|
0x51415040, 0x71617060,
|
||||||
|
0x53435242, 0x73637262,
|
||||||
|
|
||||||
|
0x15051404, 0x35253424,
|
||||||
|
0x17071606, 0x37273626,
|
||||||
|
0x55455444, 0x75657464,
|
||||||
|
0x57475646, 0x77677666,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM (8)
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||||
|
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||||
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint block_ofs = 4 * gl_GlobalInvocationID.x;
|
||||||
|
uint p0 = in_data[block_ofs + 0];
|
||||||
|
uint p1 = in_data[block_ofs + 1];
|
||||||
|
uint p2 = in_data[block_ofs + 2];
|
||||||
|
uint p3 = in_data[block_ofs + 3];
|
||||||
|
|
||||||
|
uint tiles_per_pitch = (info.pitch >> 3) >> 2; // log2(MICRO_TILE_DIM) / 4
|
||||||
|
ivec2 img_pos = MICRO_TILE_DIM * ivec2(
|
||||||
|
gl_WorkGroupID.x % tiles_per_pitch,
|
||||||
|
gl_WorkGroupID.x / tiles_per_pitch
|
||||||
|
);
|
||||||
|
imageStore(output_img, img_pos + ivec2(col, row), uvec4(p0, p1, p2, p3));
|
||||||
|
}
|
|
@ -33,8 +33,32 @@ static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool IsDepthStencilFormat(vk::Format format) {
|
bool ImageInfo::IsBlockCoded() const {
|
||||||
switch (format) {
|
switch (pixel_format) {
|
||||||
|
case vk::Format::eBc1RgbaSrgbBlock:
|
||||||
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
case vk::Format::eBc1RgbSrgbBlock:
|
||||||
|
case vk::Format::eBc1RgbUnormBlock:
|
||||||
|
case vk::Format::eBc2SrgbBlock:
|
||||||
|
case vk::Format::eBc2UnormBlock:
|
||||||
|
case vk::Format::eBc3SrgbBlock:
|
||||||
|
case vk::Format::eBc3UnormBlock:
|
||||||
|
case vk::Format::eBc4SnormBlock:
|
||||||
|
case vk::Format::eBc4UnormBlock:
|
||||||
|
case vk::Format::eBc5SnormBlock:
|
||||||
|
case vk::Format::eBc5UnormBlock:
|
||||||
|
case vk::Format::eBc6HSfloatBlock:
|
||||||
|
case vk::Format::eBc6HUfloatBlock:
|
||||||
|
case vk::Format::eBc7SrgbBlock:
|
||||||
|
case vk::Format::eBc7UnormBlock:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ImageInfo::IsDepthStencil() const {
|
||||||
|
switch (pixel_format) {
|
||||||
case vk::Format::eD16Unorm:
|
case vk::Format::eD16Unorm:
|
||||||
case vk::Format::eD16UnormS8Uint:
|
case vk::Format::eD16UnormS8Uint:
|
||||||
case vk::Format::eD32Sfloat:
|
case vk::Format::eD32Sfloat:
|
||||||
|
@ -45,17 +69,20 @@ static bool IsDepthStencilFormat(vk::Format format) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
|
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
|
||||||
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
||||||
vk::ImageUsageFlagBits::eTransferDst |
|
vk::ImageUsageFlagBits::eTransferDst |
|
||||||
vk::ImageUsageFlagBits::eSampled;
|
vk::ImageUsageFlagBits::eSampled;
|
||||||
if (IsDepthStencilFormat(format)) {
|
if (info.IsDepthStencil()) {
|
||||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||||
} else {
|
} else {
|
||||||
if (format != vk::Format::eBc3SrgbBlock) {
|
if (!info.IsBlockCoded()) {
|
||||||
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
usage |= vk::ImageUsageFlagBits::eColorAttachment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (info.is_tiled || info.is_storage) {
|
||||||
|
usage |= vk::ImageUsageFlagBits::eStorage;
|
||||||
|
}
|
||||||
return usage;
|
return usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,15 +206,12 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
}
|
}
|
||||||
if (info.is_tiled) {
|
if (info.is_tiled) {
|
||||||
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
|
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
|
||||||
if (false) { // IsBlockCodedFormat()
|
if (info.IsBlockCoded()) {
|
||||||
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info.usage = ImageUsageFlags(info.pixel_format);
|
info.usage = ImageUsageFlags(info);
|
||||||
if (info.is_tiled || info.is_storage) {
|
|
||||||
info.usage |= vk::ImageUsageFlagBits::eStorage;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.pixel_format == vk::Format::eD32Sfloat) {
|
if (info.pixel_format == vk::Format::eD32Sfloat) {
|
||||||
aspect_mask = vk::ImageAspectFlagBits::eDepth;
|
aspect_mask = vk::ImageAspectFlagBits::eDepth;
|
||||||
|
|
|
@ -42,6 +42,9 @@ struct ImageInfo {
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||||
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||||
|
|
||||||
|
bool IsBlockCoded() const;
|
||||||
|
bool IsDepthStencil() const;
|
||||||
|
|
||||||
bool is_tiled = false;
|
bool is_tiled = false;
|
||||||
bool is_storage = false;
|
bool is_storage = false;
|
||||||
vk::Format pixel_format = vk::Format::eUndefined;
|
vk::Format pixel_format = vk::Format::eUndefined;
|
||||||
|
|
|
@ -84,7 +84,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||||
.baseMipLevel = 0U,
|
.baseMipLevel = 0U,
|
||||||
.levelCount = 1,
|
.levelCount = 1,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
.layerCount = image.info.IsBlockCoded() ? 1 : VK_REMAINING_ARRAY_LAYERS,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
|
image_view = instance.GetDevice().createImageViewUnique(image_view_ci);
|
||||||
|
|
|
@ -8,8 +8,10 @@
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
|
#include "video_core/host_shaders/detile_m32x1_comp.h"
|
||||||
|
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
||||||
|
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
||||||
#include "video_core/host_shaders/detile_m8x4_comp.h"
|
|
||||||
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
#include <magic_enum.hpp>
|
#include <magic_enum.hpp>
|
||||||
|
@ -174,27 +176,39 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_
|
||||||
|
|
||||||
vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eB8G8R8A8Srgb:
|
|
||||||
case vk::Format::eR8G8B8A8Unorm:
|
|
||||||
return vk::Format::eR8G8B8A8Uint;
|
|
||||||
case vk::Format::eR8Unorm:
|
case vk::Format::eR8Unorm:
|
||||||
return vk::Format::eR8Uint;
|
return vk::Format::eR8Uint;
|
||||||
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
|
[[fallthrough]];
|
||||||
|
case vk::Format::eR8G8B8A8Unorm:
|
||||||
|
return vk::Format::eR32Uint;
|
||||||
|
case vk::Format::eBc1RgbaUnormBlock:
|
||||||
|
return vk::Format::eR32G32Uint;
|
||||||
|
case vk::Format::eBc3SrgbBlock:
|
||||||
|
[[fallthrough]];
|
||||||
|
case vk::Format::eBc3UnormBlock:
|
||||||
|
return vk::Format::eR32G32B32A32Uint;
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
||||||
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
|
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
|
||||||
|
|
||||||
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) {
|
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled ||
|
||||||
|
image.info.tiling_mode == AmdGpu::TilingMode::Depth_MicroTiled) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case vk::Format::eR8Uint:
|
case vk::Format::eR8Uint:
|
||||||
return &detilers[DetilerType::Micro8x1];
|
return &detilers[DetilerType::Micro8x1];
|
||||||
case vk::Format::eR8G8B8A8Uint:
|
case vk::Format::eR32Uint:
|
||||||
return &detilers[DetilerType::Micro8x4];
|
return &detilers[DetilerType::Micro32x1];
|
||||||
|
case vk::Format::eR32G32Uint:
|
||||||
|
return &detilers[DetilerType::Micro32x2];
|
||||||
|
case vk::Format::eR32G32B32A32Uint:
|
||||||
|
return &detilers[DetilerType::Micro32x4];
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -211,7 +225,9 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc
|
||||||
|
|
||||||
static const std::array detiler_shaders{
|
static const std::array detiler_shaders{
|
||||||
HostShaders::DETILE_M8X1_COMP,
|
HostShaders::DETILE_M8X1_COMP,
|
||||||
HostShaders::DETILE_M8X4_COMP,
|
HostShaders::DETILE_M32X1_COMP,
|
||||||
|
HostShaders::DETILE_M32X2_COMP,
|
||||||
|
HostShaders::DETILE_M32X4_COMP,
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int pl_id = 0; pl_id < DetilerType::Max; ++pl_id) {
|
for (int pl_id = 0; pl_id < DetilerType::Max; ++pl_id) {
|
||||||
|
|
|
@ -19,7 +19,9 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format);
|
||||||
|
|
||||||
enum DetilerType : u32 {
|
enum DetilerType : u32 {
|
||||||
Micro8x1,
|
Micro8x1,
|
||||||
Micro8x4,
|
Micro32x1,
|
||||||
|
Micro32x2,
|
||||||
|
Micro32x4,
|
||||||
|
|
||||||
Max
|
Max
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue