Merge pull request #163 from shadps4-emu/video_core/surface_works
Basic detiling
This commit is contained in:
commit
7d61b7ab9b
|
@ -530,6 +530,13 @@ endif()
|
||||||
|
|
||||||
target_include_directories(shadps4 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
target_include_directories(shadps4 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
|
# Shaders sources
|
||||||
|
set(HOST_SHADERS_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/src/video_core/host_shaders)
|
||||||
|
|
||||||
|
add_subdirectory(${HOST_SHADERS_INCLUDE})
|
||||||
|
add_dependencies(shadps4 host_shaders)
|
||||||
|
target_include_directories(shadps4 PRIVATE ${HOST_SHADERS_INCLUDE})
|
||||||
|
|
||||||
if (ENABLE_QT_GUI)
|
if (ENABLE_QT_GUI)
|
||||||
set_target_properties(shadps4 PROPERTIES
|
set_target_properties(shadps4 PROPERTIES
|
||||||
WIN32_EXECUTABLE ON
|
WIN32_EXECUTABLE ON
|
||||||
|
|
|
@ -1462,7 +1462,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
||||||
if (Config::dumpPM4()) {
|
if (Config::dumpPM4()) {
|
||||||
static auto last_frame_num = frames_submitted;
|
static auto last_frame_num = frames_submitted;
|
||||||
static u32 seq_num{};
|
static u32 seq_num{};
|
||||||
if (last_frame_num == frames_submitted) {
|
if (last_frame_num && last_frame_num == frames_submitted) {
|
||||||
++seq_num;
|
++seq_num;
|
||||||
} else {
|
} else {
|
||||||
last_frame_num = frames_submitted;
|
last_frame_num = frames_submitted;
|
||||||
|
|
|
@ -199,7 +199,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
ASSERT(col_buf_id < NumColorBuffers);
|
ASSERT(col_buf_id < NumColorBuffers);
|
||||||
|
|
||||||
const auto nop_offset = header->type3.count;
|
const auto nop_offset = header->type3.count;
|
||||||
if (nop_offset == 0x0e) {
|
if (nop_offset == 0x0e || nop_offset == 0x0d) {
|
||||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||||
"NOP hint is missing in CB setup sequence");
|
"NOP hint is missing in CB setup sequence");
|
||||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "resource.h"
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
@ -622,7 +623,7 @@ struct Liverpool {
|
||||||
BitField<19, 1, u32> cmask_is_linear;
|
BitField<19, 1, u32> cmask_is_linear;
|
||||||
} info;
|
} info;
|
||||||
union {
|
union {
|
||||||
BitField<0, 5, u32> tile_mode_index;
|
BitField<0, 5, TilingMode> tile_mode_index;
|
||||||
BitField<5, 5, u32> fmask_tile_mode_index;
|
BitField<5, 5, u32> fmask_tile_mode_index;
|
||||||
BitField<12, 3, u32> num_samples_log2;
|
BitField<12, 3, u32> num_samples_log2;
|
||||||
BitField<15, 3, u32> num_fragments_log2;
|
BitField<15, 3, u32> num_fragments_log2;
|
||||||
|
@ -661,6 +662,22 @@ struct Liverpool {
|
||||||
return u64(cmask_base_address) << 8;
|
return u64(cmask_base_address) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] size_t GetSizeAligned() const {
|
||||||
|
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
||||||
|
const auto slice_size = (slice.tile_max + 1) * 64u;
|
||||||
|
const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element;
|
||||||
|
ASSERT(total_size > 0);
|
||||||
|
return total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] TilingMode GetTilingMode() const {
|
||||||
|
return attrib.tile_mode_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsTiled() const {
|
||||||
|
return !info.linear_general;
|
||||||
|
}
|
||||||
|
|
||||||
NumberFormat NumFormat() const {
|
NumberFormat NumFormat() const {
|
||||||
// There is a small difference between T# and CB number types, account for it.
|
// There is a small difference between T# and CB number types, account for it.
|
||||||
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
|
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
|
||||||
|
@ -834,7 +851,9 @@ private:
|
||||||
static constexpr std::suspend_always final_suspend() noexcept {
|
static constexpr std::suspend_always final_suspend() noexcept {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
void unhandled_exception() {}
|
void unhandled_exception() {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
void return_void() {}
|
void return_void() {}
|
||||||
struct empty {};
|
struct empty {};
|
||||||
std::suspend_always yield_value(empty&&) {
|
std::suspend_always yield_value(empty&&) {
|
||||||
|
|
|
@ -40,17 +40,30 @@ std::string_view NameOf(NumberFormat fmt) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 NumComponents(DataFormat format) {
|
int NumComponents(DataFormat format) {
|
||||||
constexpr std::array numComponentsPerElement = {
|
constexpr std::array num_components_per_element = {
|
||||||
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
|
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
|
||||||
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
|
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
|
||||||
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
|
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
|
||||||
|
|
||||||
const u32 index = static_cast<u32>(format);
|
const u32 index = static_cast<u32>(format);
|
||||||
if (index >= numComponentsPerElement.size()) {
|
if (index >= num_components_per_element.size()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return numComponentsPerElement[index];
|
return num_components_per_element[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
int NumBits(DataFormat format) {
|
||||||
|
const std::array num_bits_per_element = {
|
||||||
|
0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, 16, 16, 16, 16, 32,
|
||||||
|
32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 32, 4, 8, 8, 4, 8, 8, 8,
|
||||||
|
-1, -1, 8, 8, 8, 8, 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
|
||||||
|
|
||||||
|
const u32 index = static_cast<u32>(format);
|
||||||
|
if (index >= num_bits_per_element.size()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return num_bits_per_element[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
|
@ -63,7 +63,8 @@ enum class NumberFormat : u32 {
|
||||||
|
|
||||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||||
|
|
||||||
u32 NumComponents(DataFormat format);
|
int NumComponents(DataFormat format);
|
||||||
|
int NumBits(DataFormat format);
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,12 @@ constexpr std::string_view NameOf(ImageType type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class TilingMode : u32 {
|
||||||
|
Display_Linear = 0x8u,
|
||||||
|
Display_MacroTiled = 0xAu,
|
||||||
|
Texture_MicroTiled = 0xDu,
|
||||||
|
};
|
||||||
|
|
||||||
struct Image {
|
struct Image {
|
||||||
union {
|
union {
|
||||||
BitField<0, 38, u64> base_address;
|
BitField<0, 38, u64> base_address;
|
||||||
|
@ -122,7 +128,7 @@ struct Image {
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Pitch() const {
|
u32 Pitch() const {
|
||||||
return pitch;
|
return pitch + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 NumLayers() const {
|
u32 NumLayers() const {
|
||||||
|
@ -140,6 +146,19 @@ struct Image {
|
||||||
NumberFormat GetNumberFmt() const noexcept {
|
NumberFormat GetNumberFmt() const noexcept {
|
||||||
return static_cast<NumberFormat>(num_format.Value());
|
return static_cast<NumberFormat>(num_format.Value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] TilingMode GetTilingMode() const {
|
||||||
|
return static_cast<TilingMode>(tiling_index.Value());
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsTiled() const {
|
||||||
|
return GetTilingMode() != TilingMode::Display_Linear;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] size_t GetSizeAligned() const {
|
||||||
|
// TODO: Derive this properly from tiling params
|
||||||
|
return (width + 1) * (height + 1) * NumComponents(GetDataFmt());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 8.2.7. Image Sampler [RDNA 2 Instruction Set Architecture]
|
// 8.2.7. Image Sampler [RDNA 2 Instruction Set Architecture]
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
# SPDX-FileCopyrightText: Copyright 2023 Citra Emulator Project
|
||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
set(SHADER_FILES
|
||||||
|
detile_m8x1.comp
|
||||||
|
detile_m8x4.comp
|
||||||
|
)
|
||||||
|
|
||||||
|
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||||
|
set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
|
||||||
|
set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
|
||||||
|
|
||||||
|
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
|
||||||
|
set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
|
||||||
|
|
||||||
|
foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
||||||
|
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
|
||||||
|
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
|
||||||
|
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT
|
||||||
|
${SOURCE_HEADER_FILE}
|
||||||
|
COMMAND
|
||||||
|
${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
|
||||||
|
MAIN_DEPENDENCY
|
||||||
|
${SOURCE_FILE}
|
||||||
|
DEPENDS
|
||||||
|
${INPUT_FILE}
|
||||||
|
# HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
|
||||||
|
)
|
||||||
|
set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
set(SHADER_SOURCES ${SHADER_FILES})
|
||||||
|
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
|
||||||
|
|
||||||
|
add_custom_target(host_shaders
|
||||||
|
DEPENDS
|
||||||
|
${SHADER_HEADERS}
|
||||||
|
SOURCES
|
||||||
|
${SHADER_SOURCES}
|
||||||
|
)
|
|
@ -0,0 +1,36 @@
|
||||||
|
# SPDX-FileCopyrightText: 2020 yuzu Emulator Project
|
||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
set(SOURCE_FILE ${CMAKE_ARGV3})
|
||||||
|
set(HEADER_FILE ${CMAKE_ARGV4})
|
||||||
|
set(INPUT_FILE ${CMAKE_ARGV5})
|
||||||
|
|
||||||
|
get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
|
||||||
|
string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
|
||||||
|
string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
|
||||||
|
|
||||||
|
FILE(READ ${SOURCE_FILE} line_contents)
|
||||||
|
|
||||||
|
# Replace double quotes with single quotes,
|
||||||
|
# as double quotes will be used to wrap the lines
|
||||||
|
STRING(REGEX REPLACE "\"" "'" line_contents "${line_contents}")
|
||||||
|
|
||||||
|
# CMake separates list elements with semicolons, but semicolons
|
||||||
|
# are used extensively in the shader code.
|
||||||
|
# Replace with a temporary marker, to be reverted later.
|
||||||
|
STRING(REGEX REPLACE ";" "{{SEMICOLON}}" line_contents "${line_contents}")
|
||||||
|
|
||||||
|
# Make every line an individual element in the CMake list.
|
||||||
|
STRING(REGEX REPLACE "\n" ";" line_contents "${line_contents}")
|
||||||
|
|
||||||
|
# Build the shader string, wrapping each line in double quotes.
|
||||||
|
foreach(line IN LISTS line_contents)
|
||||||
|
string(CONCAT CONTENTS "${CONTENTS}" \"${line}\\n\"\n)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
# Revert the original semicolons in the source.
|
||||||
|
STRING(REGEX REPLACE "{{SEMICOLON}}" ";" CONTENTS "${CONTENTS}")
|
||||||
|
|
||||||
|
get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY)
|
||||||
|
make_directory(${OUTPUT_DIR})
|
||||||
|
configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
|
|
@ -0,0 +1,48 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_KHR_shader_subgroup_shuffle : require
|
||||||
|
|
||||||
|
// NOTE: Current subgroup utilization is subotimal on most GPUs, so
|
||||||
|
// it will be nice to process two tiles at once here.
|
||||||
|
layout (local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(r8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM 8
|
||||||
|
#define TEXELS_PER_ELEMENT 4
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint p0 = in_data[gl_GlobalInvocationID.x];
|
||||||
|
uint p1 = subgroupShuffleXor(p0, 1);
|
||||||
|
uint hword = gl_LocalInvocationID.x & 1;
|
||||||
|
uint dst_tx = (hword == 1)
|
||||||
|
? (p0 & 0xffff0000) | (p1 >> 16)
|
||||||
|
: (p0 & 0x0000ffff) | (p1 << 16);
|
||||||
|
|
||||||
|
uint col = (gl_LocalInvocationID.x >> 2) & 1;
|
||||||
|
uint row = (gl_LocalInvocationID.x % TEXELS_PER_ELEMENT)
|
||||||
|
+ TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x >> 3);
|
||||||
|
|
||||||
|
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||||
|
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||||
|
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||||
|
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col;
|
||||||
|
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||||
|
|
||||||
|
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) {
|
||||||
|
imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(dst_tx & 0xff));
|
||||||
|
dst_tx >>= 8;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(std430, binding = 0) buffer input_buf {
|
||||||
|
uint in_data[];
|
||||||
|
};
|
||||||
|
layout(rgba8ui, binding = 1) uniform writeonly uimage2D output_img;
|
||||||
|
|
||||||
|
layout(push_constant) uniform image_info {
|
||||||
|
uint pitch;
|
||||||
|
} info;
|
||||||
|
|
||||||
|
// Inverse morton LUT, small enough to fit into K$
|
||||||
|
uint lut_8x4[16] = {
|
||||||
|
0x11011000, 0x31213020,
|
||||||
|
0x13031202, 0x33233222,
|
||||||
|
0x51415040, 0x71617060,
|
||||||
|
0x53435242, 0x73637262,
|
||||||
|
|
||||||
|
0x15051404, 0x35253424,
|
||||||
|
0x17071606, 0x37273626,
|
||||||
|
0x55455444, 0x75657464,
|
||||||
|
0x57475646, 0x77677666,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MICRO_TILE_DIM 8
|
||||||
|
#define TEXELS_PER_ELEMENT 1
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint src_tx = in_data[gl_GlobalInvocationID.x];
|
||||||
|
|
||||||
|
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
|
||||||
|
uint packed_pos = lut_8x4[gl_LocalInvocationID.x >> 2] >> bit_ofs;
|
||||||
|
uint col = bitfieldExtract(packed_pos, 4, 4);
|
||||||
|
uint row = bitfieldExtract(packed_pos, 0, 4);
|
||||||
|
|
||||||
|
uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM)
|
||||||
|
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
|
||||||
|
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;
|
||||||
|
|
||||||
|
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + TEXELS_PER_ELEMENT * col;
|
||||||
|
uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row;
|
||||||
|
|
||||||
|
ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y);
|
||||||
|
|
||||||
|
uvec4 dst_tx = uvec4(
|
||||||
|
bitfieldExtract(src_tx, 0, 8),
|
||||||
|
bitfieldExtract(src_tx, 8, 8),
|
||||||
|
bitfieldExtract(src_tx, 16, 8),
|
||||||
|
bitfieldExtract(src_tx, 24, 8)
|
||||||
|
);
|
||||||
|
imageStore(output_img, img_pos, dst_tx);
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2022 Citra Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
namespace HostShaders {
|
||||||
|
|
||||||
|
constexpr std::string_view @CONTENTS_NAME@ = {
|
||||||
|
@CONTENTS@
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace HostShaders
|
|
@ -150,7 +150,6 @@ bool Instance::CreateDevice() {
|
||||||
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||||
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||||
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||||
index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
|
||||||
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||||
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||||
// The next two extensions are required to be available together in order to support write masks
|
// The next two extensions are required to be available together in order to support write masks
|
||||||
|
@ -219,9 +218,6 @@ bool Instance::CreateDevice() {
|
||||||
.customBorderColors = true,
|
.customBorderColors = true,
|
||||||
.customBorderColorWithoutFormat = true,
|
.customBorderColorWithoutFormat = true,
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{
|
|
||||||
.indexTypeUint8 = true,
|
|
||||||
},
|
|
||||||
vk::PhysicalDeviceColorWriteEnableFeaturesEXT{
|
vk::PhysicalDeviceColorWriteEnableFeaturesEXT{
|
||||||
.colorWriteEnable = true,
|
.colorWriteEnable = true,
|
||||||
},
|
},
|
||||||
|
@ -230,10 +226,6 @@ bool Instance::CreateDevice() {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!index_type_uint8) {
|
|
||||||
device_chain.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!color_write_en) {
|
if (!color_write_en) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
||||||
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
||||||
|
|
|
@ -81,11 +81,6 @@ public:
|
||||||
return custom_border_color;
|
return custom_border_color;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true when VK_EXT_index_type_uint8 is supported
|
|
||||||
bool IsIndexTypeUint8Supported() const {
|
|
||||||
return index_type_uint8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true when VK_EXT_fragment_shader_interlock is supported
|
/// Returns true when VK_EXT_fragment_shader_interlock is supported
|
||||||
bool IsFragmentShaderInterlockSupported() const {
|
bool IsFragmentShaderInterlockSupported() const {
|
||||||
return fragment_shader_interlock;
|
return fragment_shader_interlock;
|
||||||
|
@ -216,7 +211,6 @@ private:
|
||||||
bool image_view_reinterpretation{true};
|
bool image_view_reinterpretation{true};
|
||||||
bool timeline_semaphores{};
|
bool timeline_semaphores{};
|
||||||
bool custom_border_color{};
|
bool custom_border_color{};
|
||||||
bool index_type_uint8{};
|
|
||||||
bool fragment_shader_interlock{};
|
bool fragment_shader_interlock{};
|
||||||
bool image_format_list{};
|
bool image_format_list{};
|
||||||
bool pipeline_creation_cache_control{};
|
bool pipeline_creation_cache_control{};
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/image.h"
|
#include "video_core/texture_cache/image.h"
|
||||||
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
#include <vk_mem_alloc.h>
|
#include <vk_mem_alloc.h>
|
||||||
|
|
||||||
|
@ -16,7 +17,7 @@ using namespace Vulkan;
|
||||||
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
|
using VideoOutFormat = Libraries::VideoOut::PixelFormat;
|
||||||
using Libraries::VideoOut::TilingMode;
|
using Libraries::VideoOut::TilingMode;
|
||||||
|
|
||||||
[[nodiscard]] vk::Format ConvertPixelFormat(const VideoOutFormat format) {
|
static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case VideoOutFormat::A8R8G8B8Srgb:
|
case VideoOutFormat::A8R8G8B8Srgb:
|
||||||
return vk::Format::eB8G8R8A8Srgb;
|
return vk::Format::eB8G8R8A8Srgb;
|
||||||
|
@ -32,7 +33,7 @@ using Libraries::VideoOut::TilingMode;
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
|
static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
|
||||||
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
|
||||||
vk::ImageUsageFlagBits::eTransferDst |
|
vk::ImageUsageFlagBits::eTransferDst |
|
||||||
vk::ImageUsageFlagBits::eSampled;
|
vk::ImageUsageFlagBits::eSampled;
|
||||||
|
@ -46,7 +47,7 @@ using Libraries::VideoOut::TilingMode;
|
||||||
return usage;
|
return usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case AmdGpu::ImageType::Color1D:
|
case AmdGpu::ImageType::Color1D:
|
||||||
return vk::ImageType::e1D;
|
return vk::ImageType::e1D;
|
||||||
|
@ -86,18 +87,19 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
|
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
|
||||||
is_tiled = true;
|
is_tiled = buffer.IsTiled();
|
||||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
||||||
type = vk::ImageType::e2D;
|
type = vk::ImageType::e2D;
|
||||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||||
size.depth = 1;
|
size.depth = 1;
|
||||||
pitch = size.width;
|
pitch = size.width;
|
||||||
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
|
guest_size_bytes = buffer.GetSizeAligned();
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||||
is_tiled = false;
|
is_tiled = image.IsTiled();
|
||||||
|
tiling_mode = image.GetTilingMode();
|
||||||
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||||
type = ConvertImageType(image.type);
|
type = ConvertImageType(image.type);
|
||||||
size.width = image.width + 1;
|
size.width = image.width + 1;
|
||||||
|
@ -106,8 +108,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||||
pitch = image.Pitch();
|
pitch = image.Pitch();
|
||||||
resources.levels = image.NumLevels();
|
resources.levels = image.NumLevels();
|
||||||
resources.layers = image.NumLayers();
|
resources.layers = image.NumLayers();
|
||||||
// TODO: Derive this properly from tiling params
|
guest_size_bytes = image.GetSizeAligned();
|
||||||
guest_size_bytes = size.width * size.height * 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
||||||
|
@ -151,6 +152,18 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
if (info.type == vk::ImageType::e3D) {
|
if (info.type == vk::ImageType::e3D) {
|
||||||
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
|
||||||
}
|
}
|
||||||
|
if (info.is_tiled) {
|
||||||
|
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
|
||||||
|
if (false) { // IsBlockCodedFormat()
|
||||||
|
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info.usage = ImageUsageFlags(info.pixel_format);
|
||||||
|
if (info.is_tiled || info.is_storage) {
|
||||||
|
info.usage |= vk::ImageUsageFlagBits::eStorage;
|
||||||
|
}
|
||||||
|
|
||||||
const vk::ImageCreateInfo image_ci = {
|
const vk::ImageCreateInfo image_ci = {
|
||||||
.flags = flags,
|
.flags = flags,
|
||||||
.imageType = info.type,
|
.imageType = info.type,
|
||||||
|
@ -163,12 +176,20 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
.mipLevels = static_cast<u32>(info.resources.levels),
|
.mipLevels = static_cast<u32>(info.resources.levels),
|
||||||
.arrayLayers = static_cast<u32>(info.resources.layers),
|
.arrayLayers = static_cast<u32>(info.resources.layers),
|
||||||
.tiling = vk::ImageTiling::eOptimal,
|
.tiling = vk::ImageTiling::eOptimal,
|
||||||
.usage = ImageUsageFlags(info.pixel_format),
|
.usage = info.usage,
|
||||||
.initialLayout = vk::ImageLayout::eUndefined,
|
.initialLayout = vk::ImageLayout::eUndefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
image.Create(image_ci);
|
image.Create(image_ci);
|
||||||
|
|
||||||
|
// Create a special view for detiler
|
||||||
|
if (info.is_tiled) {
|
||||||
|
ImageViewInfo view_info;
|
||||||
|
view_info.format = DemoteImageFormatForDetiling(info.pixel_format);
|
||||||
|
view_info.used_for_detiling = true;
|
||||||
|
view_for_detiler.emplace(*instance, view_info, image);
|
||||||
|
}
|
||||||
|
|
||||||
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,8 @@
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class Instance;
|
class Instance;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
@ -39,12 +41,15 @@ struct ImageInfo {
|
||||||
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||||
|
|
||||||
bool is_tiled = false;
|
bool is_tiled = false;
|
||||||
|
bool is_storage = false;
|
||||||
vk::Format pixel_format = vk::Format::eUndefined;
|
vk::Format pixel_format = vk::Format::eUndefined;
|
||||||
vk::ImageType type = vk::ImageType::e1D;
|
vk::ImageType type = vk::ImageType::e1D;
|
||||||
|
vk::ImageUsageFlags usage;
|
||||||
SubresourceExtent resources;
|
SubresourceExtent resources;
|
||||||
Extent3D size{1, 1, 1};
|
Extent3D size{1, 1, 1};
|
||||||
u32 pitch = 0;
|
u32 pitch = 0;
|
||||||
u32 guest_size_bytes = 0;
|
u32 guest_size_bytes = 0;
|
||||||
|
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct UniqueImage {
|
struct UniqueImage {
|
||||||
|
@ -114,6 +119,7 @@ struct Image {
|
||||||
VAddr cpu_addr_end = 0;
|
VAddr cpu_addr_end = 0;
|
||||||
std::vector<ImageViewInfo> image_view_infos;
|
std::vector<ImageViewInfo> image_view_infos;
|
||||||
std::vector<ImageViewId> image_view_ids;
|
std::vector<ImageViewId> image_view_ids;
|
||||||
|
std::optional<ImageView> view_for_detiler;
|
||||||
|
|
||||||
// Resource state tracking
|
// Resource state tracking
|
||||||
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
||||||
|
|
|
@ -58,10 +58,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
|
||||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image,
|
||||||
const ImageViewInfo& info_, vk::Image image)
|
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
|
||||||
: info{info_} {
|
: info{info_} {
|
||||||
|
vk::ImageViewUsageCreateInfo usage_ci{};
|
||||||
|
if (usage_override) {
|
||||||
|
usage_ci.usage = usage_override.value();
|
||||||
|
}
|
||||||
|
|
||||||
const vk::ImageViewCreateInfo image_view_ci = {
|
const vk::ImageViewCreateInfo image_view_ci = {
|
||||||
|
.pNext = usage_override.has_value() ? &usage_ci : nullptr,
|
||||||
.image = image,
|
.image = image,
|
||||||
.viewType = info.type,
|
.viewType = info.type,
|
||||||
.format = info.format,
|
.format = info.format,
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class Instance;
|
class Instance;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
@ -22,13 +24,14 @@ struct ImageViewInfo {
|
||||||
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
||||||
SubresourceRange range;
|
SubresourceRange range;
|
||||||
vk::ComponentMapping mapping{};
|
vk::ComponentMapping mapping{};
|
||||||
|
bool used_for_detiling = false;
|
||||||
|
|
||||||
auto operator<=>(const ImageViewInfo&) const = default;
|
auto operator<=>(const ImageViewInfo&) const = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageView {
|
struct ImageView {
|
||||||
explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image,
|
||||||
const ImageViewInfo& info, vk::Image image);
|
std::optional<vk::ImageUsageFlags> usage_override = {});
|
||||||
~ImageView();
|
~ImageView();
|
||||||
|
|
||||||
ImageView(const ImageView&) = delete;
|
ImageView(const ImageView&) = delete;
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "core/virtual_memory.h"
|
#include "core/virtual_memory.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
@ -64,7 +65,8 @@ static constexpr u64 PageShift = 12;
|
||||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
||||||
: instance{instance_}, scheduler{scheduler_},
|
: instance{instance_}, scheduler{scheduler_},
|
||||||
staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize,
|
staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize,
|
||||||
Vulkan::BufferType::Upload} {
|
Vulkan::BufferType::Upload},
|
||||||
|
tile_manager{instance, scheduler} {
|
||||||
|
|
||||||
#ifndef _WIN64
|
#ifndef _WIN64
|
||||||
sigset_t signal_mask;
|
sigset_t signal_mask;
|
||||||
|
@ -91,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||||
ASSERT(null_id.index == 0);
|
ASSERT(null_id.index == 0);
|
||||||
|
|
||||||
ImageViewInfo view_info;
|
ImageViewInfo view_info;
|
||||||
void(slot_image_views.insert(instance, scheduler, view_info, slot_images[null_id].image));
|
void(slot_image_views.insert(instance, view_info, slot_images[null_id].image));
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCache::~TextureCache() {
|
TextureCache::~TextureCache() {
|
||||||
|
@ -138,21 +140,33 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
|
||||||
return image;
|
return image;
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
|
ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& view_info) {
|
||||||
Image& image = FindImage(ImageInfo{desc}, desc.Address());
|
|
||||||
|
|
||||||
const ImageViewInfo view_info{desc};
|
|
||||||
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
|
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
|
||||||
return slot_image_views[view_id];
|
return slot_image_views[view_id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All tiled images are created with storage usage flag. This makes set of formats (e.g. sRGB)
|
||||||
|
// impossible to use. However, during view creation, if an image isn't used as storage and not a
|
||||||
|
// target for the detiler, we can temporary remove its storage bit.
|
||||||
|
std::optional<vk::ImageUsageFlags> usage_override;
|
||||||
|
if (!image.info.is_storage && !view_info.used_for_detiling) {
|
||||||
|
usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage;
|
||||||
|
}
|
||||||
|
|
||||||
const ImageViewId view_id =
|
const ImageViewId view_id =
|
||||||
slot_image_views.insert(instance, scheduler, view_info, image.image);
|
slot_image_views.insert(instance, view_info, image.image, usage_override);
|
||||||
image.image_view_infos.emplace_back(view_info);
|
image.image_view_infos.emplace_back(view_info);
|
||||||
image.image_view_ids.emplace_back(view_id);
|
image.image_view_ids.emplace_back(view_id);
|
||||||
return slot_image_views[view_id];
|
return slot_image_views[view_id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
|
||||||
|
Image& image = FindImage(ImageInfo{desc}, desc.Address());
|
||||||
|
|
||||||
|
const ImageViewInfo view_info{desc};
|
||||||
|
return RegisterImageView(image, view_info);
|
||||||
|
}
|
||||||
|
|
||||||
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
const AmdGpu::Liverpool::CbDbExtent& hint) {
|
||||||
const ImageInfo info{buffer, hint};
|
const ImageInfo info{buffer, hint};
|
||||||
|
@ -160,15 +174,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
|
||||||
|
|
||||||
ImageViewInfo view_info;
|
ImageViewInfo view_info;
|
||||||
view_info.format = info.pixel_format;
|
view_info.format = info.pixel_format;
|
||||||
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
|
return RegisterImageView(image, view_info);
|
||||||
return slot_image_views[view_id];
|
|
||||||
}
|
|
||||||
|
|
||||||
const ImageViewId view_id =
|
|
||||||
slot_image_views.insert(instance, scheduler, view_info, image.image);
|
|
||||||
image.image_view_infos.emplace_back(view_info);
|
|
||||||
image.image_view_ids.emplace_back(view_id);
|
|
||||||
return slot_image_views[view_id];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::RefreshImage(Image& image) {
|
void TextureCache::RefreshImage(Image& image) {
|
||||||
|
@ -176,52 +182,48 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
{
|
{
|
||||||
|
if (!tile_manager.TryDetile(image)) {
|
||||||
// Upload data to the staging buffer.
|
// Upload data to the staging buffer.
|
||||||
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
if (image.info.is_tiled) {
|
|
||||||
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
|
|
||||||
Config::isNeoMode());
|
|
||||||
} else {
|
|
||||||
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||||
|
staging.Commit(image.info.guest_size_bytes);
|
||||||
|
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||||
|
|
||||||
|
// Copy to the image.
|
||||||
|
const vk::BufferImageCopy image_copy = {
|
||||||
|
.bufferOffset = offset,
|
||||||
|
.bufferRowLength = 0,
|
||||||
|
.bufferImageHeight = 0,
|
||||||
|
.imageSubresource{
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
.imageOffset = {0, 0, 0},
|
||||||
|
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||||
|
};
|
||||||
|
|
||||||
|
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
||||||
|
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||||
}
|
}
|
||||||
staging.Commit(image.info.guest_size_bytes);
|
|
||||||
|
|
||||||
// Copy to the image.
|
|
||||||
const vk::BufferImageCopy image_copy = {
|
|
||||||
.bufferOffset = offset,
|
|
||||||
.bufferRowLength = 0,
|
|
||||||
.bufferImageHeight = 0,
|
|
||||||
.imageSubresource{
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.mipLevel = 0,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
},
|
|
||||||
.imageOffset = {0, 0, 0},
|
|
||||||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
|
||||||
};
|
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
const vk::ImageSubresourceRange range = {
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
};
|
|
||||||
|
|
||||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
|
||||||
|
|
||||||
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
|
|
||||||
vk::ImageLayout::eTransferDstOptimal, image_copy);
|
|
||||||
|
|
||||||
image.Transit(vk::ImageLayout::eGeneral,
|
image.Transit(vk::ImageLayout::eGeneral,
|
||||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const vk::ImageSubresourceRange range = {
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
};
|
||||||
|
|
||||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
for (u32 l = 0; l < image.info.resources.layers; l++) {
|
for (u32 l = 0; l < image.info.resources.layers; l++) {
|
||||||
// Upload data to the staging buffer.
|
// Upload data to the staging buffer.
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/sampler.h"
|
#include "video_core/texture_cache/sampler.h"
|
||||||
#include "video_core/texture_cache/slot_vector.h"
|
#include "video_core/texture_cache/slot_vector.h"
|
||||||
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
namespace Core::Libraries::VideoOut {
|
namespace Core::Libraries::VideoOut {
|
||||||
struct BufferAttributeGroup;
|
struct BufferAttributeGroup;
|
||||||
|
@ -36,22 +37,24 @@ public:
|
||||||
void OnCpuWrite(VAddr address);
|
void OnCpuWrite(VAddr address);
|
||||||
|
|
||||||
/// Retrieves the image handle of the image with the provided attributes and address.
|
/// Retrieves the image handle of the image with the provided attributes and address.
|
||||||
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
|
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address);
|
||||||
|
|
||||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||||
ImageView& FindImageView(const AmdGpu::Image& image);
|
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);
|
||||||
|
|
||||||
/// Retrieves the render target with specified properties
|
/// Retrieves the render target with specified properties
|
||||||
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint);
|
const AmdGpu::Liverpool::CbDbExtent& hint);
|
||||||
|
|
||||||
/// Reuploads image contents.
|
/// Reuploads image contents.
|
||||||
void RefreshImage(Image& image);
|
void RefreshImage(Image& image);
|
||||||
|
|
||||||
/// Retrieves the sampler that matches the provided S# descriptor.
|
/// Retrieves the sampler that matches the provided S# descriptor.
|
||||||
vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info);
|
||||||
|
|
||||||
/// Iterate over all page indices in a range
|
/// Iterate over all page indices in a range
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
||||||
|
@ -128,6 +131,7 @@ private:
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
Vulkan::StreamBuffer staging;
|
Vulkan::StreamBuffer staging;
|
||||||
|
TileManager tile_manager;
|
||||||
SlotVector<Image> slot_images;
|
SlotVector<Image> slot_images;
|
||||||
SlotVector<ImageView> slot_image_views;
|
SlotVector<ImageView> slot_image_views;
|
||||||
tsl::robin_map<u64, Sampler> samplers;
|
tsl::robin_map<u64, Sampler> samplers;
|
||||||
|
|
|
@ -1,10 +1,20 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <cstring>
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "common/assert.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
|
#include "video_core/texture_cache/image_view.h"
|
||||||
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
|
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
||||||
|
#include "video_core/host_shaders/detile_m8x4_comp.h"
|
||||||
|
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
|
#include <magic_enum.hpp>
|
||||||
|
#include <vulkan/vulkan_to_string.hpp>
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
static u32 IntLog2(u32 i) {
|
static u32 IntLog2(u32 i) {
|
||||||
|
@ -162,4 +172,188 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
|
switch (format) {
|
||||||
|
case vk::Format::eB8G8R8A8Srgb:
|
||||||
|
case vk::Format::eR8G8B8A8Unorm:
|
||||||
|
return vk::Format::eR8G8B8A8Uint;
|
||||||
|
case vk::Format::eR8Unorm:
|
||||||
|
return vk::Format::eR8Uint;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
|
||||||
|
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
|
||||||
|
|
||||||
|
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) {
|
||||||
|
switch (format) {
|
||||||
|
case vk::Format::eR8Uint:
|
||||||
|
return &detilers[DetilerType::Micro8x1];
|
||||||
|
case vk::Format::eR8G8B8A8Uint:
|
||||||
|
return &detilers[DetilerType::Micro8x4];
|
||||||
|
default:
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eTransferDst |
|
||||||
|
vk::BufferUsageFlagBits::eUniformBuffer |
|
||||||
|
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||||
|
|
||||||
|
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||||
|
: instance{instance}, scheduler{scheduler}, staging{instance, scheduler, StagingFlags, 64_MB} {
|
||||||
|
|
||||||
|
static const std::array detiler_shaders{
|
||||||
|
HostShaders::DETILE_M8X1_COMP,
|
||||||
|
HostShaders::DETILE_M8X4_COMP,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int pl_id = 0; pl_id < DetilerType::Max; ++pl_id) {
|
||||||
|
auto& ctx = detilers[pl_id];
|
||||||
|
|
||||||
|
const auto& module = Vulkan::Compile(
|
||||||
|
detiler_shaders[pl_id], vk::ShaderStageFlagBits::eCompute, instance.GetDevice());
|
||||||
|
|
||||||
|
// Set module debug name
|
||||||
|
auto module_name = magic_enum::enum_name(static_cast<DetilerType>(pl_id));
|
||||||
|
const vk::DebugUtilsObjectNameInfoEXT name_info = {
|
||||||
|
.objectType = vk::ObjectType::eShaderModule,
|
||||||
|
.objectHandle = std::bit_cast<u64>(module),
|
||||||
|
.pObjectName = module_name.data(),
|
||||||
|
};
|
||||||
|
instance.GetDevice().setDebugUtilsObjectNameEXT(name_info);
|
||||||
|
|
||||||
|
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||||
|
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||||
|
.module = module,
|
||||||
|
.pName = "main",
|
||||||
|
};
|
||||||
|
|
||||||
|
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
|
||||||
|
{
|
||||||
|
.binding = 0,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.binding = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
|
||||||
|
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||||
|
.bindingCount = static_cast<u32>(bindings.size()),
|
||||||
|
.pBindings = bindings.data(),
|
||||||
|
};
|
||||||
|
static auto desc_layout =
|
||||||
|
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||||
|
|
||||||
|
const vk::PushConstantRange push_constants = {
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||||
|
.offset = 0,
|
||||||
|
.size = sizeof(u32),
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||||
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
|
.setLayoutCount = 1U,
|
||||||
|
.pSetLayouts = &set_layout,
|
||||||
|
.pushConstantRangeCount = 1,
|
||||||
|
.pPushConstantRanges = &push_constants,
|
||||||
|
};
|
||||||
|
ctx.pl_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||||
|
|
||||||
|
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
|
||||||
|
.stage = shader_ci,
|
||||||
|
.layout = *ctx.pl_layout,
|
||||||
|
};
|
||||||
|
auto result = instance.GetDevice().createComputePipelineUnique(
|
||||||
|
/*pipeline_cache*/ {}, compute_pipeline_ci);
|
||||||
|
if (result.result == vk::Result::eSuccess) {
|
||||||
|
ctx.pl = std::move(result.value);
|
||||||
|
} else {
|
||||||
|
UNREACHABLE_MSG("Detiler pipeline creation failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once pipeline is compiled, we don't need the shader module anymore
|
||||||
|
instance.GetDevice().destroyShaderModule(module);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TileManager::~TileManager() = default;
|
||||||
|
|
||||||
|
bool TileManager::TryDetile(Image& image) {
|
||||||
|
if (!image.info.is_tiled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto* detiler = GetDetiler(image);
|
||||||
|
if (!detiler) {
|
||||||
|
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} {}",
|
||||||
|
vk::to_string(image.info.pixel_format), static_cast<u32>(image.info.tiling_mode));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||||
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
|
std::memcpy(data, image_data, image.info.guest_size_bytes);
|
||||||
|
staging.Commit(image.info.guest_size_bytes);
|
||||||
|
|
||||||
|
auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl);
|
||||||
|
|
||||||
|
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
|
||||||
|
|
||||||
|
const vk::DescriptorBufferInfo input_buffer_info{
|
||||||
|
.buffer = staging.Handle(),
|
||||||
|
.offset = offset,
|
||||||
|
.range = image.info.guest_size_bytes,
|
||||||
|
};
|
||||||
|
|
||||||
|
ASSERT(image.view_for_detiler.has_value());
|
||||||
|
const vk::DescriptorImageInfo output_image_info{
|
||||||
|
.imageView = *image.view_for_detiler->image_view,
|
||||||
|
.imageLayout = image.layout,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<vk::WriteDescriptorSet> set_writes{
|
||||||
|
{
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = 0,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
|
.pBufferInfo = &input_buffer_info,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = 1,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||||
|
.pImageInfo = &output_image_info,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *detiler->pl_layout, 0,
|
||||||
|
set_writes);
|
||||||
|
|
||||||
|
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u,
|
||||||
|
sizeof(image.info.pitch), &image.info.pitch);
|
||||||
|
|
||||||
|
cmdbuf.dispatch((image.info.size.width * image.info.size.height) / 64, 1,
|
||||||
|
1); // round to 64
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -4,10 +4,46 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
#include "video_core/texture_cache/image.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
class TextureCache;
|
||||||
|
|
||||||
/// Converts tiled texture data to linear format.
|
/// Converts tiled texture data to linear format.
|
||||||
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo);
|
void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo);
|
||||||
|
|
||||||
|
/// Converts image format to the one used internally by detiler.
|
||||||
|
vk::Format DemoteImageFormatForDetiling(vk::Format format);
|
||||||
|
|
||||||
|
enum DetilerType : u32 {
|
||||||
|
Micro8x1,
|
||||||
|
Micro8x4,
|
||||||
|
|
||||||
|
Max
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DetilerContext {
|
||||||
|
vk::UniquePipeline pl;
|
||||||
|
vk::UniquePipelineLayout pl_layout;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TileManager {
|
||||||
|
public:
|
||||||
|
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||||
|
~TileManager();
|
||||||
|
|
||||||
|
bool TryDetile(Image& image);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DetilerContext* GetDetiler(const Image& image) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Vulkan::Instance& instance;
|
||||||
|
Vulkan::Scheduler& scheduler;
|
||||||
|
Vulkan::StreamBuffer staging;
|
||||||
|
std::array<DetilerContext, DetilerType::Max> detilers;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
Loading…
Reference in New Issue