From f2916747901e8bb976e66ca31977e2b2f61fb673 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 7 Jun 2024 02:14:52 +0300 Subject: [PATCH] amdgpu: Add freegnm detiler --- CMakeLists.txt | 12 + src/common/io_file.cpp | 4 +- src/core/libraries/kernel/file_system.cpp | 8 +- .../libraries/kernel/thread_management.cpp | 5 +- .../backend/spirv/emit_spirv_image.cpp | 9 +- .../frontend/translate/scalar_memory.cpp | 12 +- .../frontend/translate/vector_alu.cpp | 3 +- .../frontend/translate/vector_memory.cpp | 2 +- src/video_core/amdgpu/gpuaddr/dataformat.cpp | 387 +++++ src/video_core/amdgpu/gpuaddr/dataformat.h | 409 ++++++ src/video_core/amdgpu/gpuaddr/element.cpp | 82 ++ src/video_core/amdgpu/gpuaddr/error.cpp | 25 + src/video_core/amdgpu/gpuaddr/error.h | 18 + src/video_core/amdgpu/gpuaddr/gpuaddr.h | 74 + .../amdgpu/gpuaddr/gpuaddr_private.h | 145 ++ src/video_core/amdgpu/gpuaddr/surface.cpp | 1289 +++++++++++++++++ src/video_core/amdgpu/gpuaddr/surfgen.cpp | 203 +++ src/video_core/amdgpu/gpuaddr/tilemodes.cpp | 815 +++++++++++ src/video_core/amdgpu/gpuaddr/tiler.cpp | 1287 ++++++++++++++++ src/video_core/amdgpu/gpuaddr/types.h | 375 +++++ src/video_core/amdgpu/liverpool.h | 4 +- src/video_core/amdgpu/resource.h | 40 +- .../renderer_vulkan/liverpool_to_vk.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_instance.cpp | 6 +- src/video_core/renderer_vulkan/vk_instance.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 9 +- src/video_core/texture_cache/image.cpp | 77 +- src/video_core/texture_cache/image.h | 2 + src/video_core/texture_cache/image_view.cpp | 2 +- .../texture_cache/texture_cache.cpp | 109 +- src/video_core/texture_cache/texture_cache.h | 3 +- 33 files changed, 5293 insertions(+), 135 deletions(-) create mode 100644 src/video_core/amdgpu/gpuaddr/dataformat.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/dataformat.h create mode 100644 src/video_core/amdgpu/gpuaddr/element.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/error.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/error.h create mode 100644 src/video_core/amdgpu/gpuaddr/gpuaddr.h create mode 100644 src/video_core/amdgpu/gpuaddr/gpuaddr_private.h create mode 100644 src/video_core/amdgpu/gpuaddr/surface.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/surfgen.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/tilemodes.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/tiler.cpp create mode 100644 src/video_core/amdgpu/gpuaddr/types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ba180f89..465e667f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -377,6 +377,18 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/resource.h + src/video_core/amdgpu/gpuaddr/dataformat.cpp + src/video_core/amdgpu/gpuaddr/dataformat.h + src/video_core/amdgpu/gpuaddr/element.cpp + src/video_core/amdgpu/gpuaddr/error.cpp + src/video_core/amdgpu/gpuaddr/error.h + src/video_core/amdgpu/gpuaddr/gpuaddr.h + src/video_core/amdgpu/gpuaddr/gpuaddr_private.h + src/video_core/amdgpu/gpuaddr/surface.cpp + src/video_core/amdgpu/gpuaddr/surfgen.cpp + src/video_core/amdgpu/gpuaddr/tilemodes.cpp + src/video_core/amdgpu/gpuaddr/tiler.cpp + src/video_core/amdgpu/gpuaddr/types.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h src/video_core/renderer_vulkan/renderer_vulkan.cpp diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index 7a441738..43ae33ad 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -184,8 +184,8 @@ void IOFile::Open(const fs::path& path, FileAccessMode mode, FileType type, File if (!IsOpen()) { const auto ec = std::error_code{errno, std::generic_category()}; - //LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}", - // PathToUTF8String(file_path), ec.message()); + // LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}", + // PathToUTF8String(file_path), ec.message()); } } diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index f74514a2..cb3ea78b 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -12,7 +12,7 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { - //LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); + // LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); auto* h = Common::Singleton::Instance(); auto* mnt = Common::Singleton::Instance(); @@ -170,7 +170,7 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) { } int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) { - //LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); + // LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); std::string path_name = mnt->GetHostFile(path); memset(sb, 0, sizeof(OrbisKernelStat)); @@ -199,13 +199,13 @@ int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { int result = sceKernelStat(path, sb); if (result < 0) { return result; - //UNREACHABLE(); // TODO + // UNREACHABLE(); // TODO } return ORBIS_OK; } int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { - //LOG_INFO(Lib_Kernel, "path = {}", path); + // LOG_INFO(Lib_Kernel, "path = {}", path); auto* mnt = Common::Singleton::Instance(); std::string path_name = mnt->GetHostFile(path); if (!std::filesystem::exists(path_name)) { diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index dfa148f5..e683c788 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -393,7 +393,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr); if (name != nullptr) { - //LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); + // LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); } switch (result) { @@ -418,7 +418,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) { int result = pthread_mutex_destroy(&(*mutex)->pth_mutex); - //LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + // LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); delete *mutex; *mutex = nullptr; @@ -1037,7 +1037,6 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) { return linker->TlsGetAddr(index->ti_module, index->ti_offset); } - int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) { if (cond == nullptr) { return SCE_KERNEL_ERROR_EINVAL; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index bb3ad2d1..487892b5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -12,14 +12,17 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - const auto info = inst->Flags(); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset) { - // TODO - return EmitImageSampleImplicitLod(ctx, inst, handle, coords, bias_lc, offset); + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, + spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 7338c4f1..2c9718c4 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -22,9 +22,15 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::Value vsharp = ir.GetScalarReg(sbase); - const IR::U32 dword_offset = - smrd.imm ? ir.Imm32(smrd.offset) : (smrd.offset == SQ_SRC_LITERAL ? ir.Imm32(inst.src[1].code) - : ir.GetScalarReg(IR::ScalarReg(smrd.offset))); + const IR::U32 dword_offset = [&] -> IR::U32 { + if (smrd.imm) { + return ir.Imm32(smrd.offset); + } + if (smrd.offset == SQ_SRC_LITERAL) { + return ir.Imm32(inst.src[1].code); + } + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); + }(); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7222c5fc..0a3ec92e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -25,8 +25,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { } void Translator::V_MUL_F32(const GcnInst& inst) { - const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 909217d7..e101cffa 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -31,7 +31,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; - //ASSERT(!mimg.da); + // ASSERT(!mimg.da); IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg dest_reg{inst.dst[0].code}; diff --git a/src/video_core/amdgpu/gpuaddr/dataformat.cpp b/src/video_core/amdgpu/gpuaddr/dataformat.cpp new file mode 100644 index 00000000..2fb0d32a --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/dataformat.cpp @@ -0,0 +1,387 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include "common/assert.h" +#include "video_core/amdgpu/gpuaddr/dataformat.h" + +GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags) { + GnmDataFormat res = { + .surfacefmt = GNM_IMG_DATA_FORMAT_INVALID, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_X, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, + }; + + switch (numsamples) { + case 1: + // invalid + break; + case 2: + switch (numfrags) { + case 1: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F1; + break; + case 2: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F2; + break; + case 4: + case 8: + // invalid + break; + default: + abort(); + } + break; + case 4: + switch (numfrags) { + case 1: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F1; + break; + case 2: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F2; + break; + case 4: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F4; + break; + case 8: + // invalid + break; + default: + abort(); + } + break; + case 8: + switch (numfrags) { + case 1: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S8_F1; + break; + case 2: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S8_F2; + break; + case 4: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F4; + break; + case 8: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F8; + break; + default: + abort(); + } + break; + case 16: + switch (numfrags) { + case 1: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S16_F1; + break; + case 2: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S16_F2; + break; + case 4: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F4; + break; + case 8: + res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F8; + break; + default: + abort(); + } + break; + default: + UNREACHABLE(); + } + + if (numsamples == 16) { + res.chany = GNM_CHAN_Y; + res.chanz = GNM_CHAN_CONSTANT1; + } + + return res; +} + +GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt) { + GnmImageFormat surfmt = GNM_IMG_DATA_FORMAT_INVALID; + GnmImgNumFormat chantype = GNM_IMG_NUM_FORMAT_UNORM; + switch (zfmt) { + case GNM_Z_INVALID: + default: + // surfmt = GNM_IMG_DATA_FORMAT_INVALID; + // chantype = GNM_IMG_NUM_FORMAT_UNORM; + break; + case GNM_Z_16: + surfmt = GNM_IMG_DATA_FORMAT_16; + // chantype = GNM_IMG_NUM_FORMAT_UNORM; + break; + case GNM_Z_32_FLOAT: + surfmt = GNM_IMG_DATA_FORMAT_32; + chantype = GNM_IMG_NUM_FORMAT_FLOAT; + break; + } + + GnmDataFormat res = { + .surfacefmt = surfmt, + .chantype = chantype, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, + }; + return res; +} + +uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_INVALID: + return 0; + case GNM_IMG_DATA_FORMAT_8: + case GNM_IMG_DATA_FORMAT_16: + case GNM_IMG_DATA_FORMAT_32: + case GNM_IMG_DATA_FORMAT_BC4: + case GNM_IMG_DATA_FORMAT_1: + case GNM_IMG_DATA_FORMAT_1_REVERSED: + return 1; + case GNM_IMG_DATA_FORMAT_8_8: + case GNM_IMG_DATA_FORMAT_16_16: + case GNM_IMG_DATA_FORMAT_32_32: + case GNM_IMG_DATA_FORMAT_8_24: + case GNM_IMG_DATA_FORMAT_24_8: + case GNM_IMG_DATA_FORMAT_X24_8_32: + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4: + case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1: + case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2: + case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2: + case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4: + case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8: + case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4: + case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8: + case GNM_IMG_DATA_FORMAT_4_4: + return 2; + case GNM_IMG_DATA_FORMAT_10_11_11: + case GNM_IMG_DATA_FORMAT_11_11_10: + case GNM_IMG_DATA_FORMAT_32_32_32: + case GNM_IMG_DATA_FORMAT_5_6_5: + case GNM_IMG_DATA_FORMAT_GB_GR: + case GNM_IMG_DATA_FORMAT_BG_RG: + case GNM_IMG_DATA_FORMAT_5_9_9_9: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_6_5_5: + return 3; + case GNM_IMG_DATA_FORMAT_10_10_10_2: + case GNM_IMG_DATA_FORMAT_2_10_10_10: + case GNM_IMG_DATA_FORMAT_8_8_8_8: + case GNM_IMG_DATA_FORMAT_16_16_16_16: + case GNM_IMG_DATA_FORMAT_32_32_32_32: + case GNM_IMG_DATA_FORMAT_1_5_5_5: + case GNM_IMG_DATA_FORMAT_5_5_5_1: + case GNM_IMG_DATA_FORMAT_4_4_4_4: + case GNM_IMG_DATA_FORMAT_BC1: + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + case GNM_IMG_DATA_FORMAT_BC7: + return 4; + default: + UNREACHABLE(); + } +} + +uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_INVALID: + return 0; + case GNM_IMG_DATA_FORMAT_8: + return 8; + case GNM_IMG_DATA_FORMAT_16: + case GNM_IMG_DATA_FORMAT_8_8: + return 16; + case GNM_IMG_DATA_FORMAT_32: + case GNM_IMG_DATA_FORMAT_16_16: + case GNM_IMG_DATA_FORMAT_10_11_11: + case GNM_IMG_DATA_FORMAT_11_11_10: + case GNM_IMG_DATA_FORMAT_10_10_10_2: + case GNM_IMG_DATA_FORMAT_2_10_10_10: + case GNM_IMG_DATA_FORMAT_8_8_8_8: + return 32; + case GNM_IMG_DATA_FORMAT_32_32: + case GNM_IMG_DATA_FORMAT_16_16_16_16: + return 64; + case GNM_IMG_DATA_FORMAT_32_32_32: + return 96; + case GNM_IMG_DATA_FORMAT_32_32_32_32: + return 128; + case GNM_IMG_DATA_FORMAT_5_6_5: + case GNM_IMG_DATA_FORMAT_1_5_5_5: + case GNM_IMG_DATA_FORMAT_5_5_5_1: + case GNM_IMG_DATA_FORMAT_4_4_4_4: + return 16; + case GNM_IMG_DATA_FORMAT_8_24: + case GNM_IMG_DATA_FORMAT_24_8: + return 32; + case GNM_IMG_DATA_FORMAT_X24_8_32: + return 64; + case GNM_IMG_DATA_FORMAT_GB_GR: + case GNM_IMG_DATA_FORMAT_BG_RG: + return 16; + case GNM_IMG_DATA_FORMAT_5_9_9_9: + return 32; + case GNM_IMG_DATA_FORMAT_BC1: + return 4; + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + return 8; + case GNM_IMG_DATA_FORMAT_BC4: + return 4; + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_BC7: + return 8; + case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1: + case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2: + case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4: + return 8; + case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1: + case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2: + return 16; + case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2: + case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4: + case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8: + return 32; + case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4: + case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8: + return 64; + case GNM_IMG_DATA_FORMAT_4_4: + return 8; + case GNM_IMG_DATA_FORMAT_6_5_5: + return 16; + case GNM_IMG_DATA_FORMAT_1: + case GNM_IMG_DATA_FORMAT_1_REVERSED: + return 1; + default: + UNREACHABLE(); + } +} + +bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out) { + switch (datafmt.chantype) { + case GNM_IMG_NUM_FORMAT_UNORM: + *out = GNM_NUMBER_UNORM; + break; + case GNM_IMG_NUM_FORMAT_SNORM: + *out = GNM_NUMBER_SNORM; + break; + *out = GNM_NUMBER_UINT; + case GNM_IMG_NUM_FORMAT_UINT: + break; + case GNM_IMG_NUM_FORMAT_SINT: + *out = GNM_NUMBER_SINT; + break; + case GNM_IMG_NUM_FORMAT_FLOAT: + *out = GNM_NUMBER_FLOAT; + break; + case GNM_IMG_NUM_FORMAT_SRGB: + *out = GNM_NUMBER_SRGB; + break; + default: + return false; + } + return true; +} + +bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out) { + const uint32_t numcomps = gnmDfGetNumComponents(datafmt); + const GnmChannel cx = datafmt.chanx; + const GnmChannel cy = datafmt.chany; + const GnmChannel cz = datafmt.chanz; + const GnmChannel cw = datafmt.chanw; + + if (numcomps == 1) { + if (cx == GNM_CHAN_X) { + *out = GNM_SWAP_STD; + return true; + } else if (cy == GNM_CHAN_X) { + *out = GNM_SWAP_ALT; + return true; + } else if (cz == GNM_CHAN_X) { + *out = GNM_SWAP_STD_REV; + return true; + } else if (cw == GNM_CHAN_X) { + *out = GNM_SWAP_ALT_REV; + return true; + } + } else if (numcomps == 2) { + if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y) { + *out = GNM_SWAP_STD; + return true; + } else if (cx == GNM_CHAN_X && cw == GNM_CHAN_Y) { + *out = GNM_SWAP_ALT; + return true; + } else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_X) { + *out = GNM_SWAP_STD_REV; + return true; + } else if (cx == GNM_CHAN_Y && cw == GNM_CHAN_X) { + *out = GNM_SWAP_STD_REV; + return true; + } + } else if (numcomps == 3) { + if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z) { + *out = GNM_SWAP_STD; + return true; + } else if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cw == GNM_CHAN_Z) { + *out = GNM_SWAP_ALT; + return true; + } else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X) { + *out = GNM_SWAP_STD_REV; + return true; + } else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cw == GNM_CHAN_X) { + *out = GNM_SWAP_ALT_REV; + return true; + } + } else if (numcomps == 4) { + if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z && cw == GNM_CHAN_W) { + *out = GNM_SWAP_STD; + return true; + } else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X && cw == GNM_CHAN_W) { + *out = GNM_SWAP_ALT; + return true; + } else if (cx == GNM_CHAN_W && cy == GNM_CHAN_Z && cz == GNM_CHAN_Y && cw == GNM_CHAN_X) { + *out = GNM_SWAP_STD_REV; + return true; + } else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_Z && cz == GNM_CHAN_W && cw == GNM_CHAN_X) { + *out = GNM_SWAP_ALT_REV; + return true; + } + } + + return false; +} + +GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_16: + return GNM_Z_16; + case GNM_IMG_DATA_FORMAT_24_8: + return GNM_Z_24; + case GNM_IMG_DATA_FORMAT_32: + return GNM_Z_32_FLOAT; + default: + return GNM_Z_INVALID; + } +} + +GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_8: + return GNM_STENCIL_8; + default: + return GNM_STENCIL_INVALID; + } +} diff --git a/src/video_core/amdgpu/gpuaddr/dataformat.h b/src/video_core/amdgpu/gpuaddr/dataformat.h new file mode 100644 index 00000000..4e820382 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/dataformat.h @@ -0,0 +1,409 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#pragma once + +#include "common/types.h" + +enum GnmSurfaceNumber { + GNM_NUMBER_UNORM = 0x0, + GNM_NUMBER_SNORM = 0x1, + GNM_NUMBER_UINT = 0x4, + GNM_NUMBER_SINT = 0x5, + GNM_NUMBER_SRGB = 0x6, + GNM_NUMBER_FLOAT = 0x7, +}; + +enum GnmImageFormat { + GNM_IMG_DATA_FORMAT_INVALID = 0x0, + GNM_IMG_DATA_FORMAT_8 = 0x1, + GNM_IMG_DATA_FORMAT_16 = 0x2, + GNM_IMG_DATA_FORMAT_8_8 = 0x3, + GNM_IMG_DATA_FORMAT_32 = 0x4, + GNM_IMG_DATA_FORMAT_16_16 = 0x5, + GNM_IMG_DATA_FORMAT_10_11_11 = 0x6, + GNM_IMG_DATA_FORMAT_11_11_10 = 0x7, + GNM_IMG_DATA_FORMAT_10_10_10_2 = 0x8, + GNM_IMG_DATA_FORMAT_2_10_10_10 = 0x9, + GNM_IMG_DATA_FORMAT_8_8_8_8 = 0xa, + GNM_IMG_DATA_FORMAT_32_32 = 0xb, + GNM_IMG_DATA_FORMAT_16_16_16_16 = 0xc, + GNM_IMG_DATA_FORMAT_32_32_32 = 0xd, + GNM_IMG_DATA_FORMAT_32_32_32_32 = 0xe, + GNM_IMG_DATA_FORMAT_5_6_5 = 0x10, + GNM_IMG_DATA_FORMAT_1_5_5_5 = 0x11, + GNM_IMG_DATA_FORMAT_5_5_5_1 = 0x12, + GNM_IMG_DATA_FORMAT_4_4_4_4 = 0x13, + GNM_IMG_DATA_FORMAT_8_24 = 0x14, + GNM_IMG_DATA_FORMAT_24_8 = 0x15, + GNM_IMG_DATA_FORMAT_X24_8_32 = 0x16, + GNM_IMG_DATA_FORMAT_GB_GR = 0x20, + GNM_IMG_DATA_FORMAT_BG_RG = 0x21, + GNM_IMG_DATA_FORMAT_5_9_9_9 = 0x22, + GNM_IMG_DATA_FORMAT_BC1 = 0x23, + GNM_IMG_DATA_FORMAT_BC2 = 0x24, + GNM_IMG_DATA_FORMAT_BC3 = 0x25, + GNM_IMG_DATA_FORMAT_BC4 = 0x26, + GNM_IMG_DATA_FORMAT_BC5 = 0x27, + GNM_IMG_DATA_FORMAT_BC6 = 0x28, + GNM_IMG_DATA_FORMAT_BC7 = 0x29, + GNM_IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c, + GNM_IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d, + GNM_IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e, + GNM_IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f, + GNM_IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30, + GNM_IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31, + GNM_IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32, + GNM_IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33, + GNM_IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34, + GNM_IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35, + GNM_IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36, + GNM_IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37, + GNM_IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38, + GNM_IMG_DATA_FORMAT_4_4 = 0x39, + GNM_IMG_DATA_FORMAT_6_5_5 = 0x3a, + GNM_IMG_DATA_FORMAT_1 = 0x3b, + GNM_IMG_DATA_FORMAT_1_REVERSED = 0x3c, + GNM_IMG_DATA_FORMAT_32_AS_8 = 0x3d, + GNM_IMG_DATA_FORMAT_32_AS_8_8 = 0x3e, + GNM_IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f, +}; + +enum GnmImgNumFormat { + GNM_IMG_NUM_FORMAT_UNORM = 0x0, + GNM_IMG_NUM_FORMAT_SNORM = 0x1, + GNM_IMG_NUM_FORMAT_USCALED = 0x2, + GNM_IMG_NUM_FORMAT_SSCALED = 0x3, + GNM_IMG_NUM_FORMAT_UINT = 0x4, + GNM_IMG_NUM_FORMAT_SINT = 0x5, + GNM_IMG_NUM_FORMAT_SNORM_OGL = 0x6, + GNM_IMG_NUM_FORMAT_FLOAT = 0x7, + GNM_IMG_NUM_FORMAT_SRGB = 0x9, + GNM_IMG_NUM_FORMAT_UBNORM = 0xa, + GNM_IMG_NUM_FORMAT_UBNORM_OGL = 0xb, + GNM_IMG_NUM_FORMAT_UBINT = 0xc, + GNM_IMG_NUM_FORMAT_UBSCALED = 0xd, +}; + +enum GnmZFormat { + GNM_Z_INVALID = 0x0, + GNM_Z_16 = 0x1, + GNM_Z_24 = 0x2, + GNM_Z_32_FLOAT = 0x3, +}; + +enum GnmStencilFormat { + GNM_STENCIL_INVALID = 0x0, + GNM_STENCIL_8 = 0x1, +}; + +enum GnmChannel { + GNM_CHAN_CONSTANT0 = 0x0, + GNM_CHAN_CONSTANT1 = 0x1, + GNM_CHAN_X = 0x4, + GNM_CHAN_Y = 0x5, + GNM_CHAN_Z = 0x6, + GNM_CHAN_W = 0x7, +}; + +enum GnmSurfaceSwap { + GNM_SWAP_STD = 0x0, + GNM_SWAP_ALT = 0x1, + GNM_SWAP_STD_REV = 0x2, + GNM_SWAP_ALT_REV = 0x3, +}; + +union GnmDataFormat { + struct { + GnmImageFormat surfacefmt : 8; + GnmImgNumFormat chantype : 4; + GnmChannel chanx : 3; + GnmChannel chany : 3; + GnmChannel chanz : 3; + GnmChannel chanw : 3; + uint32_t _unused : 8; + }; + uint32_t asuint; +}; +static_assert(sizeof(GnmDataFormat) == 0x4, ""); + +GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags); +GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt); + +static inline GnmDataFormat gnmDfInitFromStencil(GnmStencilFormat stencilfmt, + GnmImgNumFormat chantype) { + GnmDataFormat res = { + .surfacefmt = + stencilfmt == GNM_STENCIL_8 ? GNM_IMG_DATA_FORMAT_8 : GNM_IMG_DATA_FORMAT_INVALID, + .chantype = chantype, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_X, + .chanz = GNM_CHAN_X, + .chanw = GNM_CHAN_X, + }; + return res; +} + +static inline uint32_t gnmDfGetTexelsPerElement(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_BC1: + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + case GNM_IMG_DATA_FORMAT_BC4: + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_BC7: + return 16; + case GNM_IMG_DATA_FORMAT_1: + case GNM_IMG_DATA_FORMAT_1_REVERSED: + return 8; + default: + return 1; + } +} + +uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt); +uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt); +static inline uint32_t gnmDfGetTotalBitsPerElement(const GnmDataFormat fmt) { + const uint32_t bitsperelem = gnmDfGetBitsPerElement(fmt); + const uint32_t texelsperelem = gnmDfGetTexelsPerElement(fmt); + return bitsperelem * texelsperelem; +} +static inline uint32_t gnmDfGetBytesPerElement(const GnmDataFormat datafmt) { + return gnmDfGetBitsPerElement(datafmt) / 8; +} +static inline uint32_t gnmDfGetTotalBytesPerElement(const GnmDataFormat fmt) { + return gnmDfGetTotalBitsPerElement(fmt) / 8; +} +static inline bool gnmDfIsBlockCompressed(const GnmDataFormat datafmt) { + switch (datafmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_BC1: + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + case GNM_IMG_DATA_FORMAT_BC4: + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_BC7: + return true; + default: + return false; + } +} + +bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out); +bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out); + +GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt); +GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt); + +static inline uint32_t gnmDfGetTexelsPerElementWide(const GnmDataFormat fmt) { + switch (fmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_BC1: + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + case GNM_IMG_DATA_FORMAT_BC4: + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_BC7: + return 4; + case GNM_IMG_DATA_FORMAT_1: + case GNM_IMG_DATA_FORMAT_1_REVERSED: + return 8; + case GNM_IMG_DATA_FORMAT_GB_GR: + case GNM_IMG_DATA_FORMAT_BG_RG: + return 2; + default: + return 1; + } +} +static inline uint32_t gnmDfGetTexelsPerElementTall(const GnmDataFormat fmt) { + switch (fmt.surfacefmt) { + case GNM_IMG_DATA_FORMAT_BC1: + case GNM_IMG_DATA_FORMAT_BC2: + case GNM_IMG_DATA_FORMAT_BC3: + case GNM_IMG_DATA_FORMAT_BC4: + case GNM_IMG_DATA_FORMAT_BC5: + case GNM_IMG_DATA_FORMAT_BC6: + case GNM_IMG_DATA_FORMAT_BC7: + return 4; + default: + return 1; + } +} + +static const GnmDataFormat GNM_FMT_INVALID = { + .surfacefmt = GNM_IMG_DATA_FORMAT_INVALID, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_CONSTANT0, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT0, +}; +static const GnmDataFormat GNM_FMT_R8_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_A8_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_CONSTANT0, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_X, +}; +static const GnmDataFormat GNM_FMT_R8G8B8A8_SRGB = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8, + .chantype = GNM_IMG_NUM_FORMAT_SRGB, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R8G8B8A8_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R8G8B8A8_UINT = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8, + .chantype = GNM_IMG_NUM_FORMAT_UINT, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_B8G8R8A8_SRGB = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8, + .chantype = GNM_IMG_NUM_FORMAT_SRGB, + .chanx = GNM_CHAN_Z, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_X, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_B8G8R8A8_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_Z, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_X, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R16_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_16, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_R16G16B16A16_SRGB = { + .surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16, + .chantype = GNM_IMG_NUM_FORMAT_SRGB, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R16G16B16A16_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R32_FLOAT = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32, + .chantype = GNM_IMG_NUM_FORMAT_FLOAT, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_CONSTANT0, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_R32G32_FLOAT = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32, + .chantype = GNM_IMG_NUM_FORMAT_FLOAT, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_CONSTANT0, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_R32G32B32_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_CONSTANT0, +}; +static const GnmDataFormat GNM_FMT_R32G32B32_FLOAT = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32, + .chantype = GNM_IMG_NUM_FORMAT_FLOAT, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_R32G32B32A32_SRGB = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32, + .chantype = GNM_IMG_NUM_FORMAT_SRGB, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R32G32B32A32_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_R32G32B32A32_FLOAT = { + .surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32, + .chantype = GNM_IMG_NUM_FORMAT_FLOAT, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_BC6_SNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_BC6, + .chantype = GNM_IMG_NUM_FORMAT_SNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_BC6_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_BC6, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_CONSTANT1, +}; +static const GnmDataFormat GNM_FMT_BC7_UNORM = { + .surfacefmt = GNM_IMG_DATA_FORMAT_BC7, + .chantype = GNM_IMG_NUM_FORMAT_UNORM, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; +static const GnmDataFormat GNM_FMT_BC7_SRGB = { + .surfacefmt = GNM_IMG_DATA_FORMAT_BC7, + .chantype = GNM_IMG_NUM_FORMAT_SRGB, + .chanx = GNM_CHAN_X, + .chany = GNM_CHAN_Y, + .chanz = GNM_CHAN_Z, + .chanw = GNM_CHAN_W, +}; diff --git a/src/video_core/amdgpu/gpuaddr/element.cpp b/src/video_core/amdgpu/gpuaddr/element.cpp new file mode 100644 index 00000000..b82bebac --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/element.cpp @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h" + +uint64_t gpaComputeSurfaceAddrFromCoordLinear( + uint32_t x, ///< [in] x coord + uint32_t y, ///< [in] y coord + uint32_t slice, ///< [in] slice/depth index + uint32_t sample, ///< [in] sample index + uint32_t bpp, ///< [in] bits per pixel + uint32_t pitch, ///< [in] pitch + uint32_t height, ///< [in] height + uint32_t numSlices, ///< [in] number of slices + uint32_t* pBitPosition ///< [out] bit position inside a byte +) { + const uint64_t sliceSize = (uint64_t)pitch * height; + + uint64_t sliceOffset = (slice + sample * numSlices) * sliceSize; + uint64_t rowOffset = (uint64_t)y * pitch; + uint64_t pixOffset = x; + + uint64_t addr = (sliceOffset + rowOffset + pixOffset) * bpp; + + if (pBitPosition) { + *pBitPosition = (uint32_t)(addr % 8); + } + addr /= 8; + + return addr; +} + +GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex, + uint32_t miplevel, uint32_t arrayslice) { + if (!tex) { + return GPA_ERR_INVALID_ARGS; + } + + const uint32_t numarrayslices = tex->numslices; + const uint32_t basewidth = tex->width; + const uint32_t baseheight = tex->height; + const uint32_t basedepth = tex->depth; + const uint32_t basepitch = tex->pitch; + + GpaTilingParams tp = {}; + GpaError res = gpaTpInit(&tp, tex, 0, arrayslice); + if (res != GPA_ERR_OK) { + return res; + } + + GpaSurfaceInfo si = {0}; + + uint32_t finaloffset = 0; + uint32_t finalsize = 0; + + for (uint32_t m = 0; m <= miplevel; m += 1) { + finaloffset += numarrayslices * finalsize; + + tp.linearwidth = std::max(basewidth >> m, 1U); + tp.linearheight = std::max(baseheight >> m, 1U); + tp.lineardepth = basedepth; + tp.basetiledpitch = basepitch; + tp.miplevel = m; + + res = gpaComputeSurfaceInfo(&si, &tp); + if (res != GPA_ERR_OK) { + return res; + } + + finalsize = si.surfacesize; + } + + finaloffset += si.surfacesize * arrayslice; + + if (outsize) { + *outsize = finalsize; + } + if (outoffset) { + *outoffset = finaloffset; + } + return GPA_ERR_OK; +} diff --git a/src/video_core/amdgpu/gpuaddr/error.cpp b/src/video_core/amdgpu/gpuaddr/error.cpp new file mode 100644 index 00000000..ec0c8395 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/error.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include "video_core/amdgpu/gpuaddr/error.h" + +std::string_view gpaStrError(const GpaError err) { + switch (err) { + case GPA_ERR_OK: + return "No error"; + case GPA_ERR_INVALID_ARGS: + return "An invalid argument was used"; + case GPA_ERR_OVERFLOW: + return "A buffer has overflown"; + case GPA_ERR_TILING_ERROR: + return "An internal tiling error occured"; + case GPA_ERR_UNSUPPORTED: + return "A requested feature is unsupported"; + case GPA_ERR_INTERNAL_ERROR: + return "An internal error occured"; + case GPA_ERR_NOT_COMPRESSED: + return "The texture is not compressed"; + default: + return ""; + } +} diff --git a/src/video_core/amdgpu/gpuaddr/error.h b/src/video_core/amdgpu/gpuaddr/error.h new file mode 100644 index 00000000..b5830b0c --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/error.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +enum GpaError { + GPA_ERR_OK = 0, + GPA_ERR_INVALID_ARGS, + GPA_ERR_OVERFLOW, + GPA_ERR_TILING_ERROR, + GPA_ERR_UNSUPPORTED, + GPA_ERR_INTERNAL_ERROR, + GPA_ERR_NOT_COMPRESSED, +}; + +std::string_view gpaStrError(const GpaError err); diff --git a/src/video_core/amdgpu/gpuaddr/gpuaddr.h b/src/video_core/amdgpu/gpuaddr/gpuaddr.h new file mode 100644 index 00000000..3e90273d --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/gpuaddr.h @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#pragma once + +#include "video_core/amdgpu/gpuaddr/error.h" +#include "video_core/amdgpu/gpuaddr/types.h" + +// +// Surface +// +GpaError gpaComputeSurfaceInfo(GpaSurfaceInfo* out, const GpaTilingParams* tp); +GpaError gpaComputeHtileInfo(GpaHtileInfo* outinfo, const GpaHtileParams* params); +GpaError gpaComputeCmaskInfo(GpaCmaskInfo* outinfo, const GpaCmaskParams* params); +GpaError gpaComputeFmaskInfo(GpaFmaskInfo* outinfo, const GpaFmaskParams* params); +GpaError gpaComputeSurfaceTileMode(GnmTileMode* outtilemode, GnmGpuMode mingpumode, + GnmArrayMode arraymode, GpaSurfaceFlags flags, + GnmDataFormat surfacefmt, u32 numfragsperpixel, + GnmMicroTileMode mtm); + +// +// Surface generation +// +GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype, u32 bpp, + u32 numfrags, bool mipmapped, GnmGpuMode mingpumode); + +// +// Element/Utility +// +uint64_t gpaComputeSurfaceAddrFromCoordLinear(u32 x, u32 y, u32 slice, u32 sample, u32 bpp, + u32 pitch, u32 height, u32 numSlices, + u32* pBitPosition); +GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex, + u32 miplevel, u32 arrayslice); + +GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, u32 bpp, u32 numfrags, + GnmGpuMode gpumode); +GpaError gpaComputeBaseSwizzle(u32* outswizzle, GnmTileMode tilemode, u32 surfindex, u32 bpp, + u32 numfrags, GnmGpuMode gpumode); + +// +// Decompression +// +GpaError gpaGetDecompressedSize(uint64_t* outsize, const void* inbuffer, size_t inbuffersize, + const GpaTextureInfo* texinfo); +GpaError gpaDecompressTexture(void* outbuffer, uint64_t outbuffersize, const void* inbuffer, + uint64_t inbuffersize, const GpaTextureInfo* texinfo, + GnmDataFormat* outfmt); + +// +// Tiler +// +GpaError gpaTpInit(GpaTilingParams* tp, const GpaTextureInfo* tex, u32 miplevel, u32 arrayslice); + +GpaError gpaTileSurface(void* outtile, size_t outtilesize, const void* inuntile, + size_t inuntilesize, const GpaTilingParams* tp); +GpaError gpaTileSurfaceRegion(void* outtile, size_t outtilesize, const void* inuntile, + size_t inuntilesize, const GpaTilingParams* tp, + const GpaSurfaceRegion* region, u32 srcpitch, u32 srcslicepitch); +GpaError gpaTileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip, + u32 slice); +GpaError gpaTileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo); +GpaError gpaDetileSurface(void* outuntile, size_t outuntilesize, const void* intile, + size_t intilesize, const GpaTilingParams* tp); +GpaError gpaDetileSurfaceRegion(void* outuntile, size_t outuntilesize, const void* intile, + size_t intilesize, const GpaTilingParams* tp, + const GpaSurfaceRegion* region, u32 dstpitch, u32 dstslicepitch); +GpaError gpaDetileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip, + u32 slice); +GpaError gpaDetileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo); diff --git a/src/video_core/amdgpu/gpuaddr/gpuaddr_private.h b/src/video_core/amdgpu/gpuaddr/gpuaddr_private.h new file mode 100644 index 00000000..c0b8dae6 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/gpuaddr_private.h @@ -0,0 +1,145 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include "video_core/amdgpu/gpuaddr/gpuaddr.h" + +constexpr u32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling +constexpr u32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling +constexpr u32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes +constexpr u32 XThickTileThickness = 8; ///< Extra thick tiling thickness +constexpr u32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64 +constexpr u32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache +constexpr u32 CmaskElemBits = 4; ///< Number of bits for CMASK element +constexpr u32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32 + +constexpr u32 MicroTilePixels = MicroTileWidth * MicroTileHeight; + +constexpr u32 Block64K = 0x10000; +constexpr u32 PrtTileSize = Block64K; + +constexpr u32 PIPE_INTERLEAVE_BYTES = 256; +constexpr u32 BANK_INTERLEAVE = 1; + +constexpr u32 BLOCK_SIZE = 4; +constexpr u32 MICROTILE_SIZE = 8; +constexpr u32 TILE_SIZE = 8; +constexpr u32 DRAM_ROW_SIZE = 1024; + +static inline uint32_t QLog2(uint32_t x) { + uint32_t y = 0; + + switch (x) { + case 1: + y = 0; + break; + case 2: + y = 1; + break; + case 4: + y = 2; + break; + case 8: + y = 3; + break; + case 16: + y = 4; + break; + } + + return y; +} + +static inline bool IsPow2(const uint32_t x) { + return (x > 0) && ((x & (x - 1)) == 0); +} +static inline uint32_t NextPow2(uint32_t x) { + x = x - 1; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} +static inline uint32_t PowTwoAlign32(uint32_t x, uint32_t align) { + return (x + (align - 1)) & (~(align - 1)); +} + +static inline uint32_t BitsToBytes32(uint32_t x) { + return (x + (8 - 1)) / 8; +} +static inline uint64_t BitsToBytes64(uint64_t x) { + return (x + (8 - 1)) / 8; +} +static inline uint32_t BytesToBits32(uint32_t x) { + return x * 8; +} +static inline uint64_t BytesToBits64(uint64_t x) { + return x * 8; +} + +GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode); +GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode); +GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode); +GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode); +GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode); +GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode); + +GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode, + uint32_t bitsperelem, uint32_t numfragsperpixel); + +GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode, + GnmArrayMode newarraymode); + +uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode); +bool gpaIsLinear(GnmArrayMode arraymode); +bool gpaIsMicroTiled(GnmArrayMode arraymode); +bool gpaIsMacroTiled(GnmArrayMode arraymode); +bool gpaIsPrt(GnmArrayMode arraymode); + +GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm); +// +// BASE mode macrotilemode stuff +// +GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm); +GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm); +GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm); +// +// NEO mode macrotilemode stuff +// +GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm); +GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm); +GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm); + +uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg); + +static inline uint32_t getblockpitch(const GnmDataFormat fmt) { + const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt); + const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt); + return BLOCK_SIZE * bytesperelem / texelsperelemwide; +} +static inline uint32_t gettilepitch(const GnmDataFormat fmt) { + const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt); + const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt); + return TILE_SIZE * bytesperelem / texelsperelemwide; +} + +static inline uint32_t getelemsperblockwide(const GnmDataFormat fmt) { + const uint32_t elemwidth = gnmDfGetTexelsPerElementWide(fmt); + return BLOCK_SIZE / elemwidth; +} +static inline uint32_t getelemsperblocktall(const GnmDataFormat fmt) { + const uint32_t elemheight = gnmDfGetTexelsPerElementTall(fmt); + return BLOCK_SIZE / elemheight; +} + +static inline uint32_t GetTileSplitBytes(GnmTileSplit split, uint32_t bpp, uint32_t thickness) { + uint32_t tileBytes1x = BitsToBytes32(bpp * MicroTilePixels * thickness); + + // Non-depth entries store a split factor + uint32_t sampleSplit = 64 << split; + return std::max(256u, sampleSplit * tileBytes1x); +} diff --git a/src/video_core/amdgpu/gpuaddr/surface.cpp b/src/video_core/amdgpu/gpuaddr/surface.cpp new file mode 100644 index 00000000..70a6d695 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/surface.cpp @@ -0,0 +1,1289 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h" + +static inline void ComputeMipLevel(uint32_t* outwidth, uint32_t* outheight, + const GpaTilingParams* params) { + uint32_t width = params->linearwidth; + uint32_t height = params->linearheight; + + if (params->isblockcompressed) { + if (params->miplevel == 0) { + // DXTn's level 0 must be multiple of 4 + // But there are exceptions: + // 1. Internal surface creation in hostblt/vsblt/etc... + // 2. Runtime doesn't reject ATI1/ATI2 whose + // width/height are not multiple of 4 + width = PowTwoAlign32(width, 4); + height = PowTwoAlign32(height, 4); + } + } + + // basePitch is calculated from level 0 so we only check this + // for mipLevel > 0 + if (params->miplevel > 0 && params->basetiledpitch != 0) { + width = std::max(1U, params->basetiledpitch >> params->miplevel); + } + + // pow2Pad is done in PostComputeMipLevel + + *outwidth = width; + *outheight = height; +} + +static inline void AdjustPitchAlignment(GpaSurfaceFlags flags, ///< [in] Surface flags + uint32_t* pPitchAlign ///< [out] Pointer to pitch alignment +) { + // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO + // which means 32 pixel alignment Maybe it will be fixed in + // future but let's make it general for now. + if (flags.display || flags.overlay) { + *pPitchAlign = PowTwoAlign32(*pPitchAlign, 32); + + if (flags.display) { + *pPitchAlign = std::max(1U, *pPitchAlign); + /**pPitchAlign = + umax(m_minPitchAlignPixels, *pPitchAlign);*/ + } + } +} + +static void ComputeSurfaceAlignmentsLinear( + GnmArrayMode arrayMode, ///< [in] tile mode + uint32_t bpp, ///< [in] bits per pixel + GpaSurfaceFlags flags, ///< [in] surface flags + uint32_t* pBaseAlign, ///< [out] base address alignment in bytes + uint32_t* pPitchAlign, ///< [out] pitch alignment in pixels + uint32_t* pHeightAlign ///< [out] height alignment in pixels +) { + switch (arrayMode) { + case GNM_ARRAY_LINEAR_GENERAL: + // + // The required base alignment and pitch and height + // granularities is to 1 element. + // + *pBaseAlign = (bpp > 8) ? bpp / 8 : 1; + *pPitchAlign = 1; + *pHeightAlign = 1; + break; + case GNM_ARRAY_LINEAR_ALIGNED: + // + // The required alignment for base is the pipe + // interleave size. The required granularity for pitch + // is hwl dependent. The required granularity for height + // is one row. + // + *pBaseAlign = PIPE_INTERLEAVE_BYTES; + *pPitchAlign = std::max(8U, 64 / BitsToBytes32(bpp)); + *pHeightAlign = 1; + break; + default: + *pBaseAlign = 1; + *pPitchAlign = 1; + *pHeightAlign = 1; + break; + } + + AdjustPitchAlignment(flags, pPitchAlign); +} + +static void ComputeSurfaceAlignmentsMicroTiled( + GpaSurfaceFlags flags, ///< [in] surface flags + uint32_t* pBaseAlign, ///< [out] base address alignment in bytes + uint32_t* pPitchAlign, ///< [out] pitch alignment in pixels + uint32_t* pHeightAlign ///< [out] height alignment in pixels +) { + // + // The required alignment for base is the pipe interleave size. + // + *pBaseAlign = PIPE_INTERLEAVE_BYTES; + *pPitchAlign = MicroTileWidth; + *pHeightAlign = MicroTileHeight; + + AdjustPitchAlignment(flags, pPitchAlign); +} + +static GpaError HwlReduceBankWidthHeight(uint32_t tileSize, ///< [in] tile size + uint32_t bpp, ///< [in] bits per pixel + GpaSurfaceFlags flags, ///< [in] surface flags + uint32_t numSamples, ///< [in] number of samples + uint32_t bankHeightAlign, ///< [in] bank height alignment + uint32_t pipes, ///< [in] pipes + GpaTileInfo* pTileInfo ///< [in,out] bank structure. +) { + uint32_t macroAspectAlign; + bool valid = true; + + uint32_t bankWidth = (1 << pTileInfo->bankwidth); + uint32_t bankHeight = (1 << pTileInfo->bankheight); + uint32_t macroAspectRatio = (1 << pTileInfo->macroaspectratio); + + if (tileSize * bankWidth * bankHeight > DRAM_ROW_SIZE) { + bool stillGreater = true; + + // Try reducing bankWidth first + if (stillGreater && bankWidth > 1) { + while (stillGreater && bankWidth > 0) { + bankWidth >>= 1; + + if (bankWidth == 0) { + bankWidth = 1; + break; + } + + stillGreater = tileSize * bankWidth * bankHeight > DRAM_ROW_SIZE; + } + + // bankWidth is reduced above, so we need to + // recalculate bankHeight and ratio + bankHeightAlign = + std::max(1u, PIPE_INTERLEAVE_BYTES * BANK_INTERLEAVE / (tileSize * bankWidth)); + + // We cannot increase bankHeight so just assert + // this case. + if ((bankHeight % bankHeightAlign) != 0) { + return GPA_ERR_INTERNAL_ERROR; + } + + if (numSamples == 1) { + macroAspectAlign = std::max(1u, PIPE_INTERLEAVE_BYTES * BANK_INTERLEAVE / + (tileSize * pipes * bankWidth)); + macroAspectRatio = PowTwoAlign32(macroAspectRatio, macroAspectAlign); + } + } + + // Early quit bank_height degradation for "64" bit z + // buffer + if (flags.depthtarget && bpp >= 64) { + stillGreater = false; + } + + // Then try reducing bankHeight + if (stillGreater && bankHeight > bankHeightAlign) { + while (stillGreater && bankHeight > bankHeightAlign) { + bankHeight >>= 1; + + if (bankHeight < bankHeightAlign) { + bankHeight = bankHeightAlign; + break; + } + + stillGreater = tileSize * bankWidth * bankHeight > DRAM_ROW_SIZE; + } + } + + valid = !stillGreater; + } + + return valid ? GPA_ERR_OK : GPA_ERR_UNSUPPORTED; +} + +static GpaError ComputeSurfaceAlignmentsMacroTiled(GnmArrayMode tileMode, ///< [in] tile mode + uint32_t bpp, ///< [in] bits per pixel + GpaSurfaceFlags flags, ///< [in] surface flags + uint32_t mipLevel, ///< [in] mip level + uint32_t numSamples, ///< [in] number of samples + GpaSurfaceInfo* pOut ///< [in,out] Surface output +) { + uint32_t macroTileWidth; + uint32_t macroTileHeight; + + uint32_t tileSize; + uint32_t bankHeightAlign; + uint32_t macroAspectAlign; + + uint32_t thickness = gpaGetMicroTileThickness(tileMode); + uint32_t pipes = gpaGetPipeCount(pOut->tileinfo.pipeconfig); + + // + // Align bank height first according to latest h/w spec + // + + const uint32_t banks = 2 << pOut->tileinfo.banks; + const uint32_t bankWidth = (1 << pOut->tileinfo.bankwidth); + uint32_t bankHeight = (1 << pOut->tileinfo.bankheight); + uint32_t macroAspectRatio = (1 << pOut->tileinfo.macroaspectratio); + const uint32_t tileSplitBytes = GetTileSplitBytes(pOut->tileinfo.tilesplit, bpp, thickness); + + // tile_size = MIN(tile_split, 64 * tile_thickness * + // element_bytes * num_samples) + tileSize = std::min(tileSplitBytes, BitsToBytes32(64 * thickness * bpp * numSamples)); + + // bank_height_align = + // MAX(1, (pipe_interleave_bytes * + // bank_interleave)/(tile_size*bank_width)) + bankHeightAlign = + std::max(1u, PIPE_INTERLEAVE_BYTES * BANK_INTERLEAVE / (tileSize * bankWidth)); + + bankHeight = PowTwoAlign32(bankHeight, bankHeightAlign); + + // num_pipes * bank_width * macro_tile_aspect >= + // (pipe_interleave_size * bank_interleave) / tile_size + if (numSamples == 1) { + // this restriction is only for mipmap (mipmap's + // numSamples must be 1) + macroAspectAlign = + std::max(1u, PIPE_INTERLEAVE_BYTES * BANK_INTERLEAVE / (tileSize * pipes * bankWidth)); + macroAspectRatio = PowTwoAlign32(macroAspectRatio, macroAspectAlign); + } + + // Sony's library ignores any failure, so do the same + GpaError err = HwlReduceBankWidthHeight(tileSize, bpp, flags, numSamples, bankHeightAlign, + pipes, &pOut->tileinfo); + if (err != GPA_ERR_OK) { + return err; + } + + // + // The required granularity for pitch is the macro tile + // width. + // + macroTileWidth = MicroTileWidth * bankWidth * pipes * macroAspectRatio; + + pOut->pitchalign = macroTileWidth; + pOut->blockwidth = macroTileWidth; + + AdjustPitchAlignment(flags, &pOut->pitchalign); + + // + // The required granularity for height is the macro tile + // height. + // + macroTileHeight = MicroTileHeight * bankHeight * banks / macroAspectRatio; + + pOut->heightalign = macroTileHeight; + pOut->blockheight = macroTileHeight; + + // + // Compute base alignment + // + pOut->basealign = pipes * bankWidth * banks * bankHeight * tileSize; + + if ((mipLevel == 0) && (flags.prt)) { + uint32_t macroTileSize = pOut->blockwidth * pOut->blockheight * numSamples * bpp / 8; + + if (macroTileSize < PrtTileSize) { + uint32_t numMacroTiles = PrtTileSize / macroTileSize; + + if ((PrtTileSize % macroTileSize) != 0) { + return GPA_ERR_INTERNAL_ERROR; + } + + pOut->pitchalign *= numMacroTiles; + pOut->basealign *= numMacroTiles; + } + } + + return GPA_ERR_OK; +} + +static GpaError PadDimensions(GnmArrayMode arrayMode, ///< [in] tile mode + uint32_t bpp, ///< [in] bits per pixel + GpaSurfaceFlags flags, ///< [in] surface flags + uint32_t numSamples, ///< [in] number of samples + const GpaTileInfo* pTileInfo, ///< [in] bank structure. + uint32_t padDims, ///< [in] Dimensions to pad valid value 1,2,3 + uint32_t mipLevel, ///< [in] MipLevel + uint32_t* pPitch, ///< [in,out] pitch in pixels + uint32_t* pPitchAlign, ///< [in,out] pitch align could be changed in + ///< HwlPadDimensions + uint32_t* pHeight, ///< [in,out] height in pixels + uint32_t heightAlign, ///< [in] height alignment + uint32_t* pSlices, ///< [in,out] number of slices + uint32_t sliceAlign, ///< [in] number of slice alignment + GnmGpuMode mingpumode ///< [in] min GPU mode +) { + uint32_t pitchAlign = *pPitchAlign; + uint32_t thickness = gpaGetMicroTileThickness(arrayMode); + + if (padDims > 3) { + return GPA_ERR_INVALID_ARGS; + } + + // + // Override padding for mip levels + // + if (mipLevel > 0) { + if (flags.cube) { + // for cubemap, we only pad when client call + // with 6 faces as an identity + if (*pSlices > 1) { + padDims = 3; // we should pad cubemap + // sub levels when we + // treat it as 3d texture + } else { + padDims = 2; + } + } + } + + // Any possibilities that padDims is 0? + if (padDims == 0) { + padDims = 3; + } + + if (IsPow2(pitchAlign)) { + *pPitch = PowTwoAlign32((*pPitch), pitchAlign); + } else // add this code to pass unit test, r600 linear mode is + // not align bpp to pow2 for linear + { + *pPitch += pitchAlign - 1; + *pPitch /= pitchAlign; + *pPitch *= pitchAlign; + } + + if (padDims > 1) { + if (IsPow2(heightAlign)) { + *pHeight = PowTwoAlign32((*pHeight), heightAlign); + } else { + *pHeight += heightAlign - 1; + *pHeight /= heightAlign; + *pHeight *= heightAlign; + } + } + + if (padDims > 2 || thickness > 1) { + // for cubemap single face, we do not pad slices. + // if we pad it, the slice number should be set to 6 and + // current mip level > 1 + if (flags.cube && flags.cubeasarray) { + *pSlices = NextPow2(*pSlices); + } + + // normal 3D texture or arrays or cubemap has a thick + // mode? (Just pass unit test) + if (thickness > 1) { + *pSlices = PowTwoAlign32((*pSlices), sliceAlign); + } + } + + if (mingpumode == GNM_GPU_NEO && (numSamples > 1) && (mipLevel == 0) && + gpaIsMacroTiled(arrayMode)) { + const uint32_t tileSplitBytes = GetTileSplitBytes(pTileInfo->tilesplit, bpp, thickness); + uint32_t tileSizePerSample = BitsToBytes32(bpp * MicroTileWidth * MicroTileHeight); + uint32_t samplesPerSplit = tileSplitBytes / tileSizePerSample; + + if (samplesPerSplit < numSamples) { + uint32_t dccFastClearByteAlign = + gpaGetPipeCount(pTileInfo->pipeconfig) * PIPE_INTERLEAVE_BYTES * 256; + uint32_t bytesPerSplit = BitsToBytes32((*pPitch) * (*pHeight) * bpp * samplesPerSplit); + + if (!IsPow2(dccFastClearByteAlign)) { + return GPA_ERR_INTERNAL_ERROR; + } + + if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1))) { + uint32_t dccFastClearPixelAlign = + dccFastClearByteAlign / BitsToBytes32(bpp) / samplesPerSplit; + uint32_t macroTilePixelAlign = (*pPitchAlign) * heightAlign; + + if ((dccFastClearPixelAlign >= macroTilePixelAlign) && + ((dccFastClearPixelAlign % macroTilePixelAlign) == 0)) { + uint32_t dccFastClearPitchAlignInMacroTile = + dccFastClearPixelAlign / macroTilePixelAlign; + uint32_t heightInMacroTile = (*pHeight) / heightAlign; + + while ((heightInMacroTile > 1) && ((heightInMacroTile % 2) == 0) && + (dccFastClearPitchAlignInMacroTile > 1) && + ((dccFastClearPitchAlignInMacroTile % 2) == 0)) { + heightInMacroTile >>= 1; + dccFastClearPitchAlignInMacroTile >>= 1; + } + + uint32_t dccFastClearPitchAlignInPixels = + (*pPitchAlign) * dccFastClearPitchAlignInMacroTile; + + if (IsPow2(dccFastClearPitchAlignInPixels)) { + *pPitch = PowTwoAlign32((*pPitch), dccFastClearPitchAlignInPixels); + } else { + *pPitch += (dccFastClearPitchAlignInPixels - 1); + *pPitch /= dccFastClearPitchAlignInPixels; + *pPitch *= dccFastClearPitchAlignInPixels; + } + + *pPitchAlign = dccFastClearPitchAlignInPixels; + } + } + } + } + + return GPA_ERR_OK; +} + +static uint64_t HwlGetSizeAdjustmentLinear( + GnmArrayMode arrayMode, ///< [in] tile mode + uint32_t bpp, ///< [in] bits per pixel + uint32_t numSamples, ///< [in] number of samples + uint32_t pitchAlign, ///< [in] pitch alignment + uint32_t* pPitch, ///< [in,out] pointer to pitch + uint32_t* pHeight, ///< [in,out] pointer to height + uint32_t* pHeightAlign ///< [in,out] pointer to height align +) { + uint64_t sliceSize; + if (arrayMode == GNM_ARRAY_LINEAR_GENERAL) { + sliceSize = BitsToBytes64((uint64_t)(*pPitch) * (*pHeight) * bpp * numSamples); + } else { + uint32_t pitch = *pPitch; + uint32_t height = *pHeight; + + uint32_t pixelsPerPipeInterleave = PIPE_INTERLEAVE_BYTES / BitsToBytes32(bpp); + uint32_t sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave; + + // numSamples should be 1 in real cases (no MSAA for + // linear but TGL may pass non 1 value) + uint64_t pixelPerSlice = (uint64_t)(pitch)*height * numSamples; + + while (pixelPerSlice % sliceAlignInPixel) { + pitch += pitchAlign; + pixelPerSlice = (uint64_t)(pitch)*height * numSamples; + } + + *pPitch = pitch; + + uint32_t heightAlign = 1; + + while ((pitch * heightAlign) % sliceAlignInPixel) { + heightAlign++; + } + + *pHeightAlign = heightAlign; + + sliceSize = BitsToBytes64(pixelPerSlice * bpp); + } + + return sliceSize; +} + +static uint64_t HwlGetSizeAdjustmentMicroTiled(uint32_t bpp, ///< [in] bits per pixel + uint32_t numSamples, ///< [in] number of samples + uint32_t* pPitch, ///< [in,out] pointer to pitch + uint32_t* pHeight ///< [in,out] pointer to height +) { + uint64_t logicalSliceSize; + // uint64_t physicalSliceSize; + + uint32_t pitch = *pPitch; + uint32_t height = *pHeight; + + // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA + // so actually numSamples == 1) + logicalSliceSize = BitsToBytes64((uint64_t)pitch * height * bpp * numSamples); + + // Physical slice: multiplied by thickness + // physicalSliceSize = logicalSliceSize * thickness; + + // + // R800 will always pad physical slice size to baseAlign which + // is pipe_interleave_bytes + // + // ADDR_ASSERT((physicalSliceSize % baseAlign) == 0); + + return logicalSliceSize; +} + +static GpaError ComputeSurfaceInfoLinear(const GpaTilingParams* params, ///< [in] Input structure + GpaSurfaceInfo* out, ///< [out] Output structure + uint32_t padDims ///< [in] Dimensions to padd +) { + uint32_t expPitch = params->linearwidth; + uint32_t expHeight = params->linearheight; + uint32_t expNumSlices = params->lineardepth; + + // No linear MSAA on real H/W, keep this for TGL + uint32_t numSamples = params->numfragsperpixel; + + const uint32_t microTileThickness = 1; + const GnmArrayMode arrayMode = gpaGetArrayMode(params->tilemode); + + // + // Compute the surface alignments. + // + ComputeSurfaceAlignmentsLinear(arrayMode, params->bitsperfrag, params->surfaceflags, + &out->basealign, &out->pitchalign, &out->heightalign); + + if (arrayMode == GNM_ARRAY_LINEAR_GENERAL && params->surfaceflags.colortarget && + (params->linearheight > 1)) { + // When linear_general surface is accessed in multiple + // lines, it requires 8 pixels in pitch alignment since + // PITCH_TILE_MAX is in unit of 8 pixels. It is OK if it + // is accessed per line. + if ((params->linearwidth % 8) != 0) { + return GPA_ERR_INTERNAL_ERROR; + } + } + + out->depthalign = microTileThickness; + + // + // Pad pitch and height to the required granularities. + // + GpaError err = PadDimensions(arrayMode, params->bitsperfrag, params->surfaceflags, numSamples, + &out->tileinfo, padDims, params->miplevel, &expPitch, + &out->pitchalign, &expHeight, out->heightalign, &expNumSlices, + microTileThickness, params->mingpumode); + if (err != GPA_ERR_OK) { + return err; + } + + // + // Adjust per HWL + // + + uint64_t logicalSliceSize = + HwlGetSizeAdjustmentLinear(arrayMode, params->bitsperfrag, numSamples, out->pitchalign, + &expPitch, &expHeight, &out->heightalign); + + out->pitch = expPitch; + out->height = expHeight; + out->depth = expNumSlices; + + out->surfacesize = logicalSliceSize * expNumSlices; + + out->tilemode = params->tilemode; + + return GPA_ERR_OK; +} + +// TODO: make static +GnmArrayMode HwlDegradeThickTileMode( + GnmArrayMode baseTileMode, ///< [in] base tile mode + uint32_t numSlices, ///< [in] current number of slices + uint32_t* pBytesPerTile ///< [in,out] pointer to bytes per slice +) { + // if pBytesPerTile is NULL, this is a don't-care.... + uint32_t bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64; + + GnmArrayMode expTileMode = baseTileMode; + switch (baseTileMode) { + case GNM_ARRAY_1D_TILED_THICK: + expTileMode = GNM_ARRAY_1D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case GNM_ARRAY_2D_TILED_THICK: + expTileMode = GNM_ARRAY_2D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case GNM_ARRAY_3D_TILED_THICK: + expTileMode = GNM_ARRAY_3D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case GNM_ARRAY_2D_TILED_XTHICK: + if (numSlices < ThickTileThickness) { + expTileMode = GNM_ARRAY_2D_TILED_THIN1; + bytesPerTile >>= 3; + } else { + expTileMode = GNM_ARRAY_2D_TILED_THICK; + bytesPerTile >>= 1; + } + break; + case GNM_ARRAY_3D_TILED_XTHICK: + if (numSlices < ThickTileThickness) { + expTileMode = GNM_ARRAY_3D_TILED_THIN1; + bytesPerTile >>= 3; + } else { + expTileMode = GNM_ARRAY_3D_TILED_THICK; + bytesPerTile >>= 1; + } + break; + default: + break; + } + + if (pBytesPerTile != NULL) { + *pBytesPerTile = bytesPerTile; + } + + return expTileMode; +} + +static GnmArrayMode ComputeSurfaceMipLevelTileMode( + GnmArrayMode baseTileMode, ///< [in] base tile mode + uint32_t bpp, ///< [in] bits per pixels + uint32_t pitch, ///< [in] current level pitch + uint32_t height, ///< [in] current level height + uint32_t numSlices, ///< [in] current number of slices + uint32_t numSamples, ///< [in] number of samples + uint32_t pitchAlign, ///< [in] pitch alignment + uint32_t heightAlign, ///< [in] height alignment + const GpaTileInfo* pTileInfo ///< [in] ptr to bank structure +) { + // uint64_t bytesPerSlice; + uint32_t bytesPerTile; + + GnmArrayMode expTileMode = baseTileMode; + uint32_t microTileThickness = gpaGetMicroTileThickness(expTileMode); + uint32_t interleaveSize = PIPE_INTERLEAVE_BYTES * BANK_INTERLEAVE; + + // + // Compute the size of a slice. + // + /*bytesPerSlice = + BitsToBytes64((uint64_t)pitch * height * bpp * numSamples);*/ + bytesPerTile = BitsToBytes32(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples); + + // + // Reduce tiling mode from thick to thin if the number of slices + // is less than the micro tile thickness. + // + if (numSlices < microTileThickness) { + expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile); + } + + const uint32_t bankWidth = (1 << pTileInfo->bankwidth); + const uint32_t bankHeight = (1 << pTileInfo->bankheight); + const uint32_t macroAspectRatio = (1 << pTileInfo->macroaspectratio); + const uint32_t tileSplitBytes = + GetTileSplitBytes(pTileInfo->tilesplit, bpp, microTileThickness); + if (bytesPerTile > tileSplitBytes) { + bytesPerTile = tileSplitBytes; + } + + uint32_t threshold1 = + bytesPerTile * gpaGetPipeCount(pTileInfo->pipeconfig) * bankWidth * macroAspectRatio; + + uint32_t threshold2 = bytesPerTile * bankWidth * bankHeight; + + // + // Reduce the tile mode from 2D/3D to 1D in following conditions + // + switch (expTileMode) { + case GNM_ARRAY_2D_TILED_THIN1: // fall through + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + if ((pitch < pitchAlign) || (height < heightAlign) || (interleaveSize > threshold1) || + (interleaveSize > threshold2)) { + expTileMode = GNM_ARRAY_1D_TILED_THIN1; + } + break; + case GNM_ARRAY_2D_TILED_THICK: // fall through + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_3D_TILED_XTHICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + if ((pitch < pitchAlign) || (height < heightAlign)) { + expTileMode = GNM_ARRAY_1D_TILED_THICK; + } + break; + default: + break; + } + + return expTileMode; +} + +static GpaError ComputeSurfaceInfoMicroTiled(const GpaTilingParams* pIn, ///< [in] Input structure + GpaSurfaceInfo* pOut, ///< [out] Output structure + uint32_t padDims, ///< [in] Dimensions to padd + GnmArrayMode expTileMode ///< [in] Expected tile mode +) { + uint32_t microTileThickness; + uint32_t expPitch = pIn->linearwidth; + uint32_t expHeight = pIn->linearheight; + uint32_t expNumSlices = pIn->lineardepth; + + // No 1D MSAA on real H/W, keep this for TGL + uint32_t numSamples = pIn->numfragsperpixel; + + // + // Compute the micro tile thickness. + // + microTileThickness = gpaGetMicroTileThickness(expTileMode); + + // + // Extra override for mip levels + // + if (pIn->miplevel > 0) { + // + // Reduce tiling mode from thick to thin if the number + // of slices is less than the micro tile thickness. + // + if ((expTileMode == GNM_ARRAY_1D_TILED_THICK) && (expNumSlices < ThickTileThickness)) { + expTileMode = HwlDegradeThickTileMode(GNM_ARRAY_1D_TILED_THICK, expNumSlices, NULL); + if (expTileMode != GNM_ARRAY_1D_TILED_THICK) { + microTileThickness = 1; + } + } + } + + // + // Compute the surface restrictions. + // + ComputeSurfaceAlignmentsMicroTiled(pIn->surfaceflags, &pOut->basealign, &pOut->pitchalign, + &pOut->heightalign); + + pOut->depthalign = microTileThickness; + + // + // Pad pitch and height to the required granularities. + // Compute surface size. + // Return parameters. + // + PadDimensions(expTileMode, pIn->bitsperfrag, pIn->surfaceflags, numSamples, &pOut->tileinfo, + padDims, pIn->miplevel, &expPitch, &pOut->pitchalign, &expHeight, + pOut->heightalign, &expNumSlices, microTileThickness, pIn->mingpumode); + + // + // Get HWL specific pitch adjustment + // + uint64_t logicalSliceSize = + HwlGetSizeAdjustmentMicroTiled(pIn->bitsperfrag, numSamples, &expPitch, &expHeight); + + pOut->pitch = expPitch; + pOut->height = expHeight; + pOut->depth = expNumSlices; + + pOut->surfacesize = logicalSliceSize * expNumSlices; + + GpaError err = gpaAdjustTileMode(&pOut->tilemode, pIn->tilemode, expTileMode); + if (err != GPA_ERR_OK) { + return err; + } + + return GPA_ERR_OK; +} + +static GpaError ComputeSurfaceInfoMacroTiled(const GpaTilingParams* pIn, ///< [in] Input structure + GpaSurfaceInfo* pOut, ///< [out] Output structure + uint32_t padDims, ///< [in] Dimensions to padd + GnmArrayMode expTileMode ///< [in] Expected tile mode +) { + GnmArrayMode origTileMode = expTileMode; + uint32_t microTileThickness; + + uint32_t paddedPitch; + uint32_t paddedHeight; + uint64_t bytesPerSlice; + + uint32_t expPitch = pIn->linearwidth; + uint32_t expHeight = pIn->linearheight; + uint32_t expNumSlices = pIn->lineardepth; + + uint32_t numSamples = pIn->numfragsperpixel; + + // + // Compute the surface restrictions as base SanityCheckMacroTiled is + // called in ComputeSurfaceAlignmentsMacroTiled + // + GpaError err = ComputeSurfaceAlignmentsMacroTiled( + expTileMode, pIn->bitsperfrag, pIn->surfaceflags, pIn->miplevel, numSamples, pOut); + if (err != GPA_ERR_OK) { + return err; + } + + // + // Compute the micro tile thickness. + // + microTileThickness = gpaGetMicroTileThickness(expTileMode); + + // + // Find the correct tiling mode for mip levels + // + if (pIn->miplevel > 0) { + // + // Try valid tile mode + // + expTileMode = ComputeSurfaceMipLevelTileMode( + expTileMode, pIn->bitsperfrag, expPitch, expHeight, expNumSlices, numSamples, + pOut->blockwidth, pOut->blockheight, &pOut->tileinfo); + + if (!gpaIsMacroTiled(expTileMode)) // Downgraded to micro-tiled + { + return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode); + } else if (microTileThickness != gpaGetMicroTileThickness(expTileMode)) { + // + // Re-compute if thickness changed since bank-height may + // be changed! + // + return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); + } + } + + paddedPitch = expPitch; + paddedHeight = expHeight; + + // + // Re-cal alignment + // + if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled + { + err = ComputeSurfaceAlignmentsMacroTiled(expTileMode, pIn->bitsperfrag, pIn->surfaceflags, + pIn->miplevel, numSamples, pOut); + if (err != GPA_ERR_OK) { + return err; + } + } + + // + // Do padding + // + PadDimensions(expTileMode, pIn->bitsperfrag, pIn->surfaceflags, numSamples, &pOut->tileinfo, + padDims, pIn->miplevel, &paddedPitch, &pOut->pitchalign, &paddedHeight, + pOut->heightalign, &expNumSlices, microTileThickness, pIn->mingpumode); + + pOut->pitch = paddedPitch; + pOut->height = paddedHeight; + pOut->depth = expNumSlices; + + // + // Compute the size of a slice. + // + bytesPerSlice = BitsToBytes64((uint64_t)paddedPitch * paddedHeight * + NextPow2(pIn->bitsperfrag) * numSamples); + + pOut->surfacesize = bytesPerSlice * expNumSlices; + + err = gpaAdjustTileMode(&pOut->tilemode, pIn->tilemode, expTileMode); + if (err != GPA_ERR_OK) { + return err; + } + + pOut->depthalign = microTileThickness; + + return GPA_ERR_OK; +} + +static GpaError DispatchComputeSurfaceInfo(GpaSurfaceInfo* out, const GpaTilingParams* params) { + const GnmArrayMode arrayMode = gpaGetArrayMode(params->tilemode); + uint32_t bpp = params->bitsperfrag; + uint32_t numSamples = params->numfragsperpixel; + uint32_t numSlices = params->lineardepth; + uint32_t mipLevel = params->miplevel; + GpaSurfaceFlags flags = params->surfaceflags; + + uint32_t padDims = 0; + + // For macro tile mode, we should calculate default tiling + // parameters + GpaError err = + gpaGetTileInfo(&out->tileinfo, params->tilemode, bpp, numSamples, params->mingpumode); + if (err != GPA_ERR_OK) { + return err; + } + + if (flags.cube) { + if (mipLevel == 0) { + padDims = 2; + } + + if (numSlices == 1) { + // This is calculating one face, remove cube flag + flags.cube = 0; + } + } + + switch (arrayMode) { + case GNM_ARRAY_LINEAR_GENERAL: // fall through + case GNM_ARRAY_LINEAR_ALIGNED: + err = ComputeSurfaceInfoLinear(params, out, padDims); + break; + + case GNM_ARRAY_1D_TILED_THIN1: // fall through + case GNM_ARRAY_1D_TILED_THICK: + err = ComputeSurfaceInfoMicroTiled(params, out, padDims, arrayMode); + break; + + case GNM_ARRAY_2D_TILED_THIN1: // fall through + case GNM_ARRAY_2D_TILED_THICK: // fall through + case GNM_ARRAY_3D_TILED_THIN1: // fall through + case GNM_ARRAY_3D_TILED_THICK: // fall through + case GNM_ARRAY_2D_TILED_XTHICK: // fall through + case GNM_ARRAY_3D_TILED_XTHICK: // fall through + case GNM_ARRAY_PRT_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_2D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_3D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_TILED_THICK: // fall through + case GNM_ARRAY_PRT_2D_TILED_THICK: // fall through + case GNM_ARRAY_PRT_3D_TILED_THICK: + err = ComputeSurfaceInfoMacroTiled(params, out, padDims, arrayMode); + break; + + default: + return GPA_ERR_INTERNAL_ERROR; + } + + return err; +} + +GpaError gpaComputeSurfaceInfo(GpaSurfaceInfo* out, const GpaTilingParams* params) { + if (!out || !params) { + return GPA_ERR_INVALID_ARGS; + } + + // We suggest client do sanity check but a check here is also good + if (params->bitsperfrag > 128) { + return GPA_ERR_INVALID_ARGS; + } + + uint32_t bitsperelem = params->bitsperfrag; + + if (!IsPow2(bitsperelem) || bitsperelem < 1 || bitsperelem > 128) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmArrayMode arraymode = gpaGetArrayMode(params->tilemode); + + // Thick modes don't support multisample + if (gpaGetMicroTileThickness(arraymode) > 1 && params->numfragsperpixel > 1) { + return GPA_ERR_INVALID_ARGS; + } + + // Do mipmap check first + // If format is BCn, pre-pad dimension to power-of-two according to + // HWL + uint32_t linearwidth = 0; + uint32_t linearheight = 0; + ComputeMipLevel(&linearwidth, &linearheight, params); + + if (params->numfragsperpixel > 1 && params->miplevel != 0) { + return GPA_ERR_INVALID_ARGS; + } + + // Get compression/expansion factors and element mode (which + // indicates compression/expansion + uint32_t basePitch = params->basetiledpitch; + uint32_t expandX = 1; + uint32_t expandY = 1; + /*if (params->isblockcompressed) { + // Evergreen family workaround + switch (bitsperelem) { + case 1: + expandX = 8; + bitsperelem = 8; + + // Not BCn format we still keep old way (FMT_1? No + // real test yet) + linearwidth = (linearwidth + expandX - 1) / expandX; + basePitch = (basePitch + expandX - 1) / expandX; + break; + case 4: + case 8: + expandX = 4; + expandY = 4; + bitsperelem *= 16; + + // For BCn we now pad it to POW2 at the + // beginning so it is safe to divide by + // 4 directly + linearwidth = (linearwidth + expandX - 1) / expandX; + linearheight = (linearheight + expandY - 1) / expandY; + basePitch = (basePitch + expandX - 1) / expandX; + break; + case 16: + return GPA_ERR_UNSUPPORTED; + default: + return GPA_ERR_INVALID_ARGS; + } + }*/ + + // Mipmap including level 0 must be pow2 padded since either SI + // hw expects so or it is required by CFX for Hw Compatibility + // between NI and SI. Otherwise it is only needed for mipLevel > + // 0. Any h/w has different requirement should implement its own + // virtual function + + uint32_t lineardepth = params->lineardepth; + if (params->surfaceflags.pow2pad) { + linearwidth = NextPow2(linearwidth); + linearheight = NextPow2(linearheight); + lineardepth = NextPow2(lineardepth); + } else if (params->miplevel > 0) { + linearwidth = NextPow2(linearwidth); + linearheight = NextPow2(linearheight); + + if (!params->surfaceflags.cube) { + lineardepth = NextPow2(lineardepth); + } + + // for cubemap, we keep its value at first + } + + GpaTilingParams tpcopy = *params; + tpcopy.linearwidth = linearwidth; + tpcopy.linearheight = linearheight; + tpcopy.lineardepth = lineardepth; + tpcopy.basetiledpitch = basePitch; + + GpaError err = DispatchComputeSurfaceInfo(out, &tpcopy); + if (err != GPA_ERR_OK) { + return err; + } + + // ElemLib::RestoreSurfaceInfo + out->height *= expandY; + out->pitchalign *= expandX; + out->heightalign *= expandY; + if (params->surfaceflags.pow2pad) { + out->pitch = NextPow2(out->pitch); + out->height = NextPow2(out->height); + out->depth = NextPow2(out->depth); + } + + return GPA_ERR_OK; +} + +static inline void HwlComputeTileDataWidthAndHeightLinear( + uint32_t* pMacroWidth, ///< [out] macro tile width + uint32_t* pMacroHeight, ///< [out] macro tile height + GnmPipeConfig pipeConfig ///< [in] pipe configuration +) { + uint32_t numTiles = 0; + + switch (pipeConfig) { + case GNM_ADDR_SURF_P16_32x32_8x16: + case GNM_ADDR_SURF_P8_32x32_8x16: + numTiles = 8; + break; + default: + numTiles = 4; + break; + } + + *pMacroWidth = numTiles * MicroTileWidth; + *pMacroHeight = numTiles * MicroTileHeight; +} + +static inline void ComputeTileDataWidthAndHeight( + uint32_t bpp, ///< [in] bits per pixel + uint32_t cacheBits, ///< [in] bits of cache + GnmPipeConfig pipeConfig, ///< [in] pipe configuration + uint32_t* pMacroWidth, ///< [out] macro tile width + uint32_t* pMacroHeight ///< [out] macro tile height +) { + uint32_t height = 1; + uint32_t width = cacheBits / bpp; + uint32_t pipes = gpaGetPipeCount(pipeConfig); + + // Double height until the macro-tile is close to square + // Height can only be doubled if width is even + + while ((width > height * 2 * pipes) && !(width & 1)) { + width /= 2; + height *= 2; + } + + *pMacroWidth = 8 * width; + *pMacroHeight = 8 * height * pipes; + + // Note: The above iterative comptuation is equivalent to the + // following + // + // int log2_height = + // ((log2(cacheBits)-log2(bpp)-log2(pipes))/2); int macroHeight + // = pow2( 3+log2(pipes)+log2_height ); +} + +GpaError gpaComputeHtileInfo(GpaHtileInfo* outinfo, const GpaHtileParams* params) { + if (!outinfo || !params) { + return GPA_ERR_INVALID_ARGS; + } + + const uint32_t banks = 1 << params->banks; + const uint32_t pipes = gpaGetPipeCount(params->pipeconfig); + + if (params->flags.tccompatible) { + const uint32_t sliceSize = params->pitch * params->height * 4 / (8 * 8); + const uint32_t align = pipes * banks * PIPE_INTERLEAVE_BYTES; + + outinfo->macrowidth = 8 * 512 / params->bpp; // Align width to 512-bit memory accesses + outinfo->macroheight = 8 * pipes; // Align height to number of pipes + + outinfo->htilebytes = sliceSize; + + outinfo->pitch = params->pitch; + outinfo->height = params->height; + outinfo->basealign = align; + outinfo->macrowidth = 0; + outinfo->macroheight = 0; + outinfo->bpp = 32; + } else { + uint32_t macroWidth = 0; + uint32_t macroHeight = 0; + uint32_t baseAlign = 0; + uint64_t surfBytes = 0; + uint64_t sliceBytes = 0; + + const uint32_t numSlices = std::max(1u, params->numslices); + + const uint32_t bpp = 32; + const uint32_t cacheBits = HtileCacheBits; + + const bool islinear = gpaIsLinear(params->arraymode); + + if (islinear) { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, ¯oHeight, params->pipeconfig); + } else { + ComputeTileDataWidthAndHeight(bpp, cacheBits, params->pipeconfig, ¯oWidth, + ¯oHeight); + } + + outinfo->pitch = PowTwoAlign32(params->pitch, macroWidth); + outinfo->height = PowTwoAlign32(params->height, macroHeight); + + baseAlign = PIPE_INTERLEAVE_BYTES * pipes; + if (params->flags.tccompatible) { + baseAlign *= banks; + } + + outinfo->basealign = baseAlign; + + outinfo->macrowidth = macroWidth; + outinfo->macroheight = macroHeight; + + const uint64_t HtileCacheLineSize = BitsToBytes64(HtileCacheBits); + + sliceBytes = BitsToBytes64((uint64_t)outinfo->pitch * outinfo->height * bpp / 64); + + // Align the surfSize to htilecachelinesize * pipes at + // last + surfBytes = sliceBytes * numSlices; + surfBytes = PowTwoAlign32(surfBytes, HtileCacheLineSize * pipes); + + outinfo->htilebytes = surfBytes; + outinfo->slicebytes = sliceBytes; + + outinfo->bpp = params->bpp; + } + + return GPA_ERR_OK; +} + +static inline uint64_t ComputeCmaskBytes(uint32_t pitch, ///< [in] pitch + uint32_t height, ///< [in] height + uint32_t numSlices ///< [in] number of slices +) { + return BitsToBytes64((uint64_t)pitch * height * numSlices * CmaskElemBits) / MicroTilePixels; +} + +static inline uint32_t ComputeCmaskBaseAlign(const GpaTileInfo* pTileInfo, ///< [in] Tile info + bool tcCompatible ///< [in] should be shader readable +) { + uint32_t baseAlign = PIPE_INTERLEAVE_BYTES * gpaGetPipeCount(pTileInfo->pipeconfig); + + if (tcCompatible) { + baseAlign *= (1 << pTileInfo->banks); + } + + return baseAlign; +} + +GpaError gpaComputeCmaskInfo(GpaCmaskInfo* outinfo, const GpaCmaskParams* params) { + if (!outinfo || !params) { + return GPA_ERR_INVALID_ARGS; + } + + GpaTileInfo tileinfo = {}; + GpaError err = gpaGetTileInfo(&tileinfo, params->tilemode, params->bpp, params->numfrags, + params->mingpumode); + if (err != GPA_ERR_OK) { + return err; + } + + uint32_t macroWidth = 0; + uint32_t macroHeight = 0; + uint32_t baseAlign = 0; + uint64_t surfBytes = 0; + uint64_t sliceBytes = 0; + + uint32_t numSlices = std::max(1u, params->numslices); + + const uint32_t bpp = CmaskElemBits; + const uint32_t cacheBits = CmaskCacheBits; + const GnmArrayMode arrayMode = gpaGetArrayMode(params->tilemode); + const bool islinear = gpaIsLinear(arrayMode); + + if (islinear) { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, ¯oHeight, tileinfo.pipeconfig); + } else { + ComputeTileDataWidthAndHeight(bpp, cacheBits, tileinfo.pipeconfig, ¯oWidth, + ¯oHeight); + } + + outinfo->pitch = (params->pitch + macroWidth - 1) & ~(macroWidth - 1); + outinfo->height = (params->height + macroHeight - 1) & ~(macroHeight - 1); + + sliceBytes = ComputeCmaskBytes(outinfo->pitch, outinfo->height, 1); + + baseAlign = ComputeCmaskBaseAlign(&tileinfo, params->flags.tccompatible); + + while (sliceBytes % baseAlign) { + outinfo->height += macroHeight; + + sliceBytes = ComputeCmaskBytes(outinfo->pitch, outinfo->height, 1); + } + + surfBytes = sliceBytes * numSlices; + + outinfo->cmaskbytes = surfBytes; + + // + // Use SafeAssign since they are optional + // + outinfo->macrowidth = macroWidth; + outinfo->macroheight = macroHeight; + outinfo->basealign = baseAlign; + outinfo->slicebytes = sliceBytes; + + uint32_t slice = (outinfo->pitch) * (outinfo->height); + uint32_t blockMax = slice / 128 / 128 - 1; + + uint32_t maxBlockMax = 0x3FFF; // 14 bits, 0n16383 + + if (blockMax > maxBlockMax) { + blockMax = maxBlockMax; + return GPA_ERR_INVALID_ARGS; + } + + outinfo->blockmax = blockMax; + + return GPA_ERR_OK; +} + +GpaError gpaComputeFmaskInfo(GpaFmaskInfo* outinfo, const GpaFmaskParams* params) { + if (!outinfo || !params || params->numfrags <= 1) { + return GPA_ERR_INVALID_ARGS; + } + + const GpaTilingParams tp = { + .tilemode = params->tilemode, + .mingpumode = params->mingpumode, + + .linearwidth = params->pitch, + .linearheight = params->height, + .lineardepth = params->numslices, + .numfragsperpixel = params->numfrags, + .basetiledpitch = 0, + + .miplevel = 0, + .arrayslice = 0, + .surfaceflags = + { + .fmask = 1, + .texcompatible = params->mingpumode == GNM_GPU_NEO, + }, + .bitsperfrag = params->bpp, + .isblockcompressed = params->isblockcompressed, + }; + + GpaSurfaceInfo surfinfo = {0}; + GpaError err = gpaComputeSurfaceInfo(&surfinfo, &tp); + if (err != GPA_ERR_OK) { + return err; + } + + *outinfo = (GpaFmaskInfo){ + .pitch = surfinfo.pitch, + .height = surfinfo.height, + .basealign = surfinfo.basealign, + .pitchalign = surfinfo.pitchalign, + .heightalign = surfinfo.heightalign, + .bpp = surfinfo.bitsperelem, + .fmaskbytes = surfinfo.surfacesize * params->numslices, + .slicebytes = surfinfo.surfacesize, + }; + return GPA_ERR_OK; +} diff --git a/src/video_core/amdgpu/gpuaddr/surfgen.cpp b/src/video_core/amdgpu/gpuaddr/surfgen.cpp new file mode 100644 index 00000000..434fcc65 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/surfgen.cpp @@ -0,0 +1,203 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h" + +GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype, + uint32_t bpp, uint32_t numfrags, bool mipmapped, + GnmGpuMode mingpumode) { + if (!outprops) { + return GPA_ERR_INVALID_ARGS; + } + + GpaSurfaceFlags flags = {0}; + switch (surfacetype) { + case GPA_SURFACE_COLORDISPLAY: + flags.display = 1; + break; + case GPA_SURFACE_COLOR: + break; + case GPA_SURFACE_DEPTHSTENCIL: + flags.depthtarget = 1; + flags.stenciltarget = 1; + break; + case GPA_SURFACE_DEPTH: + flags.depthtarget = 1; + break; + case GPA_SURFACE_STENCIL: + flags.stenciltarget = 1; + break; + case GPA_SURFACE_FMASK: + flags.fmask = 1; + break; + case GPA_SURFACE_TEXTUREFLAT: + case GPA_SURFACE_RWTEXTUREFLAT: + flags.pow2pad = mipmapped; + flags.texcompatible = mingpumode == GNM_GPU_NEO; + break; + case GPA_SURFACE_TEXTUREVOLUME: + case GPA_SURFACE_RWTEXTUREVOLUME: + flags.volume = 1; + flags.pow2pad = mipmapped; + flags.texcompatible = mingpumode == GNM_GPU_NEO; + break; + case GPA_SURFACE_TEXTURECUBEMAP: + case GPA_SURFACE_RWTEXTURECUBEMAP: + flags.cube = 1; + flags.pow2pad = mipmapped; + flags.texcompatible = mingpumode == GNM_GPU_NEO; + break; + default: + return GPA_ERR_INVALID_ARGS; + } + + /* Set the requested tiling mode. */ + GnmArrayMode arraymode = GNM_ARRAY_LINEAR_GENERAL; + switch (surfacetype) { + case GPA_SURFACE_COLORDISPLAY: + case GPA_SURFACE_COLOR: + case GPA_SURFACE_DEPTHSTENCIL: + case GPA_SURFACE_DEPTH: + case GPA_SURFACE_STENCIL: + case GPA_SURFACE_FMASK: + arraymode = flags.prt ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_2D_TILED_THIN1; + break; + case GPA_SURFACE_TEXTUREFLAT: + case GPA_SURFACE_RWTEXTUREFLAT: + case GPA_SURFACE_TEXTURECUBEMAP: + case GPA_SURFACE_RWTEXTURECUBEMAP: + /* MSAA requires 2D tiling. */ + if (flags.prt) { + arraymode = numfrags > 1 ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_PRT_TILED_THIN1; + } else { + arraymode = numfrags > 1 ? GNM_ARRAY_2D_TILED_THIN1 : GNM_ARRAY_1D_TILED_THIN1; + } + break; + case GPA_SURFACE_TEXTUREVOLUME: + case GPA_SURFACE_RWTEXTUREVOLUME: + arraymode = flags.prt ? GNM_ARRAY_PRT_TILED_THICK : GNM_ARRAY_1D_TILED_THICK; + break; + default: + return GPA_ERR_INVALID_ARGS; + } + + /* Set the micro tile type. */ + GnmMicroTileMode microtilemode = GNM_SURF_THIN_MICRO_TILING; + if (flags.display) + microtilemode = GNM_SURF_DISPLAY_MICRO_TILING; + else if (flags.depthtarget || flags.stenciltarget) + microtilemode = GNM_SURF_DEPTH_MICRO_TILING; + + /* Find the tile mode type */ + GnmTileMode tilemode = GNM_TM_DEPTH_2D_THIN_64; + if (microtilemode == GNM_SURF_DEPTH_MICRO_TILING) { + const uint32_t tilesize = gpaGetMicroTileThickness(arraymode) * bpp * numfrags * + MICROTILE_SIZE * MICROTILE_SIZE / 8; + if (mingpumode == GNM_GPU_NEO && DRAM_ROW_SIZE < tilesize) { + flags.texcompatible = 0; + } + if (flags.depthtarget && flags.texcompatible) { + switch (tilesize) { + case 128: + tilemode = GNM_TM_DEPTH_2D_THIN_128; + break; + case 256: + tilemode = GNM_TM_DEPTH_2D_THIN_256; + break; + case 512: + tilemode = GNM_TM_DEPTH_2D_THIN_512; + break; + default: + tilemode = GNM_TM_DEPTH_2D_THIN_1K; + break; + } + } else { + switch (numfrags) { + case 1: + tilemode = GNM_TM_DEPTH_2D_THIN_64; + break; + case 2: + case 4: + tilemode = GNM_TM_DEPTH_2D_THIN_128; + break; + case 8: + tilemode = GNM_TM_DEPTH_2D_THIN_256; + break; + default: + return GPA_ERR_INVALID_ARGS; + } + } + + switch (arraymode) { + case GNM_ARRAY_1D_TILED_THIN1: + tilemode = GNM_TM_DEPTH_1D_THIN; + break; + case GNM_ARRAY_PRT_TILED_THIN1: + tilemode = GNM_TM_DEPTH_2D_THIN_PRT_256; + break; + default: + break; + } + + if (flags.depthtarget && !flags.stenciltarget && mingpumode == GNM_GPU_NEO && + tilemode < GNM_TM_DEPTH_2D_THIN_256) { + tilemode = GNM_TM_DEPTH_2D_THIN_256; + } + } else if (microtilemode == GNM_SURF_DISPLAY_MICRO_TILING) { + if (arraymode == GNM_ARRAY_1D_TILED_THIN1) { + tilemode = GNM_TM_DISPLAY_1D_THIN; + } else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) { + tilemode = GNM_TM_DISPLAY_2D_THIN; + } else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) { + tilemode = GNM_TM_DISPLAY_THIN_PRT; + } else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) { + tilemode = GNM_TM_DISPLAY_2D_THIN_PRT; + } else { + tilemode = GNM_TM_DISPLAY_1D_THIN; + } + } else if (microtilemode == GNM_SURF_THIN_MICRO_TILING) { + if (arraymode == GNM_ARRAY_1D_TILED_THIN1) { + tilemode = GNM_TM_THIN_1D_THIN; + } else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) { + tilemode = GNM_TM_THIN_2D_THIN; + } else if (arraymode == GNM_ARRAY_3D_TILED_THIN1) { + tilemode = GNM_TM_THIN_3D_THIN; + } else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) { + tilemode = GNM_TM_THIN_THIN_PRT; + } else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) { + tilemode = GNM_TM_THIN_2D_THIN_PRT; + } else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) { + tilemode = GNM_TM_THIN_3D_THIN_PRT; + } else { + tilemode = GNM_TM_THIN_1D_THIN; + } + } else if (microtilemode == GNM_SURF_THICK_MICRO_TILING) { + if (arraymode == GNM_ARRAY_1D_TILED_THICK) { + tilemode = GNM_TM_THICK_1D_THICK; + } else if (arraymode == GNM_ARRAY_2D_TILED_THICK) { + tilemode = GNM_TM_THICK_2D_THICK; + } else if (arraymode == GNM_ARRAY_3D_TILED_THICK) { + tilemode = GNM_TM_THICK_3D_THICK; + } else if (arraymode == GNM_ARRAY_PRT_TILED_THICK) { + tilemode = GNM_TM_THICK_THICK_PRT; + } else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THICK) { + tilemode = GNM_TM_THICK_2D_THICK_PRT; + } else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THICK) { + tilemode = GNM_TM_THICK_3D_THICK_PRT; + } else if (arraymode == GNM_ARRAY_2D_TILED_XTHICK) { + tilemode = GNM_TM_THICK_2D_XTHICK; + } else if (arraymode == GNM_ARRAY_3D_TILED_XTHICK) { + tilemode = GNM_TM_THICK_3D_XTHICK; + } else { + tilemode = GNM_TM_THICK_1D_THICK; + } + } else if (microtilemode == GNM_SURF_ROTATED_MICRO_TILING) { + return GPA_ERR_INTERNAL_ERROR; + } + + *outprops = (GpaSurfaceProperties){ + .tilemode = tilemode, + .flags = flags, + }; + return GPA_ERR_OK; +} diff --git a/src/video_core/amdgpu/gpuaddr/tilemodes.cpp b/src/video_core/amdgpu/gpuaddr/tilemodes.cpp new file mode 100644 index 00000000..5285df94 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/tilemodes.cpp @@ -0,0 +1,815 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include +#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h" + +GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_THIN_1D_THIN: + return GNM_ARRAY_1D_TILED_THIN1; + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_2D_THIN_128: + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_512: + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_THIN_2D_THIN: + return GNM_ARRAY_2D_TILED_THIN1; + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_THIN_THIN_PRT: + return GNM_ARRAY_PRT_TILED_THIN1; + case GNM_TM_DEPTH_2D_THIN_PRT_256: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_THIN_2D_THIN_PRT: + return GNM_ARRAY_PRT_2D_TILED_THIN1; + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_3D_THIN_PRT: + return GNM_ARRAY_3D_TILED_THIN1; + case GNM_TM_THICK_1D_THICK: + return GNM_ARRAY_1D_TILED_THICK; + case GNM_TM_THICK_2D_THICK: + return GNM_ARRAY_2D_TILED_THICK; + case GNM_TM_THICK_3D_THICK: + return GNM_ARRAY_3D_TILED_THICK; + case GNM_TM_THICK_THICK_PRT: + return GNM_ARRAY_PRT_TILED_THICK; + case GNM_TM_THICK_2D_THICK_PRT: + return GNM_ARRAY_PRT_2D_TILED_THICK; + case GNM_TM_THICK_3D_THICK_PRT: + return GNM_ARRAY_PRT_3D_TILED_THICK; + case GNM_TM_THICK_2D_XTHICK: + return GNM_ARRAY_2D_TILED_XTHICK; + case GNM_TM_THICK_3D_XTHICK: + return GNM_ARRAY_3D_TILED_XTHICK; + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + return GNM_ARRAY_LINEAR_ALIGNED; + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_ARRAY_LINEAR_GENERAL; + default: + abort(); + } +} + +GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_2D_THIN_128: + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_512: + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DEPTH_2D_THIN_PRT_256: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + return GNM_SURF_DEPTH_MICRO_TILING; + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_SURF_DISPLAY_MICRO_TILING; + case GNM_TM_THIN_1D_THIN: + case GNM_TM_THIN_2D_THIN: + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_THIN_PRT: + case GNM_TM_THIN_2D_THIN_PRT: + case GNM_TM_THIN_3D_THIN_PRT: + return GNM_SURF_THIN_MICRO_TILING; + case GNM_TM_THICK_1D_THICK: + case GNM_TM_THICK_2D_THICK: + case GNM_TM_THICK_3D_THICK: + case GNM_TM_THICK_THICK_PRT: + case GNM_TM_THICK_2D_THICK_PRT: + case GNM_TM_THICK_3D_THICK_PRT: + case GNM_TM_THICK_2D_XTHICK: + case GNM_TM_THICK_3D_XTHICK: + return GNM_SURF_THICK_MICRO_TILING; + default: + abort(); + } +} + +GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_2D_THIN_128: + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_512: + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DEPTH_2D_THIN_PRT_256: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_THIN_1D_THIN: + case GNM_TM_THIN_2D_THIN: + case GNM_TM_THIN_2D_THIN_PRT: + case GNM_TM_THIN_3D_THIN_PRT: + case GNM_TM_THICK_1D_THICK: + case GNM_TM_THICK_2D_THICK: + case GNM_TM_THICK_2D_THICK_PRT: + case GNM_TM_THICK_2D_XTHICK: + return GNM_ADDR_SURF_P8_32x32_16x16; + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_THIN_PRT: + case GNM_TM_THICK_3D_THICK: + case GNM_TM_THICK_THICK_PRT: + case GNM_TM_THICK_3D_THICK_PRT: + case GNM_TM_THICK_3D_XTHICK: + return GNM_ADDR_SURF_P8_32x32_8x16; + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_ADDR_SURF_P2; + default: + abort(); + } +} + +GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_2D_THIN_128: + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_512: + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DEPTH_2D_THIN_PRT_256: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_THIN_1D_THIN: + case GNM_TM_THIN_2D_THIN: + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_THIN_PRT: + case GNM_TM_THIN_2D_THIN_PRT: + case GNM_TM_THIN_3D_THIN_PRT: + case GNM_TM_THICK_1D_THICK: + case GNM_TM_THICK_2D_THICK: + case GNM_TM_THICK_3D_THICK: + case GNM_TM_THICK_THICK_PRT: + case GNM_TM_THICK_2D_THICK_PRT: + case GNM_TM_THICK_3D_THICK_PRT: + case GNM_TM_THICK_2D_XTHICK: + case GNM_TM_THICK_3D_XTHICK: + return GNM_ADDR_SURF_P16_32x32_8x16; + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_ADDR_SURF_P2; + default: + abort(); + } +} + +GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_2D_THIN_128: + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_512: + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DEPTH_2D_THIN_PRT_256: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_THIN_1D_THIN: + case GNM_TM_THICK_1D_THICK: + case GNM_TM_THICK_2D_THICK: + case GNM_TM_THICK_3D_THICK: + case GNM_TM_THICK_THICK_PRT: + case GNM_TM_THICK_2D_THICK_PRT: + case GNM_TM_THICK_3D_THICK_PRT: + case GNM_TM_THICK_2D_XTHICK: + case GNM_TM_THICK_3D_XTHICK: + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_ADDR_SAMPLE_SPLIT_1; + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_THIN_2D_THIN: + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_THIN_PRT: + case GNM_TM_THIN_2D_THIN_PRT: + case GNM_TM_THIN_3D_THIN_PRT: + return GNM_ADDR_SAMPLE_SPLIT_2; + default: + abort(); + } +} + +GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode) { + switch (tilemode) { + case GNM_TM_DEPTH_2D_THIN_64: + case GNM_TM_DEPTH_1D_THIN: + case GNM_TM_DISPLAY_LINEAR_ALIGNED: + case GNM_TM_DISPLAY_1D_THIN: + case GNM_TM_DISPLAY_2D_THIN: + case GNM_TM_DISPLAY_THIN_PRT: + case GNM_TM_DISPLAY_2D_THIN_PRT: + case GNM_TM_THIN_1D_THIN: + case GNM_TM_THIN_2D_THIN: + case GNM_TM_THIN_3D_THIN: + case GNM_TM_THIN_THIN_PRT: + case GNM_TM_THIN_2D_THIN_PRT: + case GNM_TM_THIN_3D_THIN_PRT: + case GNM_TM_THICK_1D_THICK: + case GNM_TM_THICK_2D_THICK: + case GNM_TM_THICK_3D_THICK: + case GNM_TM_THICK_THICK_PRT: + case GNM_TM_THICK_2D_THICK_PRT: + case GNM_TM_THICK_3D_THICK_PRT: + case GNM_TM_THICK_2D_XTHICK: + case GNM_TM_THICK_3D_XTHICK: + case GNM_TM_DISPLAY_LINEAR_GENERAL: + return GNM_SURF_TILE_SPLIT_64B; + case GNM_TM_DEPTH_2D_THIN_128: + return GNM_SURF_TILE_SPLIT_128B; + case GNM_TM_DEPTH_2D_THIN_256: + case GNM_TM_DEPTH_2D_THIN_PRT_256: + return GNM_SURF_TILE_SPLIT_256B; + case GNM_TM_DEPTH_2D_THIN_512: + return GNM_SURF_TILE_SPLIT_512B; + case GNM_TM_DEPTH_2D_THIN_1K: + case GNM_TM_DEPTH_2D_THIN_PRT_1K: + return GNM_SURF_TILE_SPLIT_1KB; + default: + abort(); + } +} + +GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode, + uint32_t bitsperelem, uint32_t numfragsperpixel) { + if (!outmtm) { + return GPA_ERR_INVALID_ARGS; + } + if (!IsPow2(numfragsperpixel) || numfragsperpixel > 16) { + return GPA_ERR_INVALID_ARGS; + } + if (bitsperelem < 1 || bitsperelem > 128) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmArrayMode arraymode = gpaGetArrayMode(tilemode); + if (!gpaIsMacroTiled(arraymode)) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmMicroTileMode mtm = gpaGetMicroTileMode(tilemode); + const GnmSampleSplit samplesplithw = gpaGetSampleSplit(tilemode); + const GnmTileSplit tilesplithw = gpaGetTileSplit(tilemode); + + const uint32_t tilethickness = gpaGetMicroTileThickness(arraymode); + const uint32_t tilebytes1x = bitsperelem * MICROTILE_SIZE * MICROTILE_SIZE * tilethickness / 8; + const uint32_t samplesplit = 1 << samplesplithw; + const uint32_t colortilesplit = std::max(256U, samplesplit * tilebytes1x); + const uint32_t tilesplit = + (mtm == GNM_SURF_DEPTH_MICRO_TILING) ? (64u << tilesplithw) : colortilesplit; + const uint32_t tilesplic = std::min(DRAM_ROW_SIZE, tilesplit); + const uint32_t tilebytes = std::min(tilesplic, numfragsperpixel * tilebytes1x); + const uint32_t mtmidx = log2((uint32_t)(tilebytes / 64)); + + *outmtm = GnmMacroTileMode(gpaIsPrt(arraymode) ? (mtmidx + 8) : mtmidx); + return GPA_ERR_OK; +} + +GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode, + GnmArrayMode newarraymode) { + if (!outtilemode) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmArrayMode oldarraymode = gpaGetArrayMode(oldtilemode); + if (newarraymode == oldarraymode) { + *outtilemode = oldtilemode; + return GPA_ERR_OK; + } + + const GnmMicroTileMode mtm = gpaGetMicroTileMode(oldtilemode); + switch (mtm) { + case GNM_SURF_DEPTH_MICRO_TILING: + if (newarraymode != GNM_ARRAY_1D_TILED_THIN1) { + return GPA_ERR_TILING_ERROR; + } + *outtilemode = GNM_TM_DEPTH_1D_THIN; + return GPA_ERR_OK; + case GNM_SURF_DISPLAY_MICRO_TILING: + if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) { + *outtilemode = GNM_TM_DISPLAY_1D_THIN; + } else { + break; + } + return GPA_ERR_OK; + case GNM_SURF_THICK_MICRO_TILING: + if (newarraymode == GNM_ARRAY_3D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_3D_THICK; + } else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_2D_THICK; + } else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_1D_THICK; + } else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_3D_THIN; + } else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_3D_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_2D_THIN; + } else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_2D_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_1D_THIN; + } else { + break; + } + return GPA_ERR_OK; + case GNM_SURF_THIN_MICRO_TILING: + if (newarraymode == GNM_ARRAY_3D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_3D_THICK; + } else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_2D_THICK; + } else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) { + *outtilemode = GNM_TM_THICK_1D_THICK; + } else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_3D_THIN; + } else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_3D_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_2D_THIN; + } else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_2D_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_THIN_PRT; + } else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) { + *outtilemode = GNM_TM_THIN_1D_THIN; + } else { + break; + } + return GPA_ERR_OK; + case GNM_SURF_ROTATED_MICRO_TILING: + default: + return GPA_ERR_INVALID_ARGS; + } + + return GPA_ERR_UNSUPPORTED; +} + +uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THIN1: + return 1; + case GNM_ARRAY_1D_TILED_THICK: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return 4; + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_3D_TILED_XTHICK: + return 8; + default: + abort(); + } +} + +bool gpaIsLinear(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + return true; + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_1D_TILED_THICK: + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_3D_TILED_XTHICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return false; + default: + abort(); + } +} + +bool gpaIsMicroTiled(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_1D_TILED_THICK: + return true; + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_3D_TILED_XTHICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return false; + default: + abort(); + } +} + +bool gpaIsMacroTiled(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_1D_TILED_THICK: + return false; + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_3D_TILED_XTHICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return true; + default: + abort(); + } +} + +static bool ismacrotiled3d(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_1D_TILED_THICK: + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THICK: + return false; + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_3D_TILED_XTHICK: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return true; + default: + abort(); + } +} + +bool gpaIsPrt(GnmArrayMode arraymode) { + switch (arraymode) { + case GNM_ARRAY_PRT_TILED_THIN1: + case GNM_ARRAY_PRT_TILED_THICK: + case GNM_ARRAY_PRT_2D_TILED_THIN1: + case GNM_ARRAY_PRT_2D_TILED_THICK: + case GNM_ARRAY_PRT_3D_TILED_THIN1: + case GNM_ARRAY_PRT_3D_TILED_THICK: + return true; + case GNM_ARRAY_LINEAR_GENERAL: + case GNM_ARRAY_LINEAR_ALIGNED: + case GNM_ARRAY_1D_TILED_THIN1: + case GNM_ARRAY_1D_TILED_THICK: + case GNM_ARRAY_2D_TILED_THIN1: + case GNM_ARRAY_2D_TILED_THICK: + case GNM_ARRAY_2D_TILED_XTHICK: + case GNM_ARRAY_3D_TILED_THIN1: + case GNM_ARRAY_3D_TILED_THICK: + case GNM_ARRAY_3D_TILED_XTHICK: + return false; + default: + abort(); + } +} + +// +// BASE mode macrotilemode stuff +// +GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x8_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_BANK_WIDTH_1; + default: + abort(); + } +} + +GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_BANK_HEIGHT_1; + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x2_16_DUP: + return GNM_SURF_BANK_HEIGHT_2; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + return GNM_SURF_BANK_HEIGHT_4; + case GNM_MACROTILEMODE_1x8_16: + return GNM_SURF_BANK_HEIGHT_8; + default: + abort(); + } +} + +GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_2_BANK; + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_4_DUP: + return GNM_SURF_4_BANK; + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_8_DUP: + return GNM_SURF_8_BANK; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x8_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + return GNM_SURF_16_BANK; + default: + abort(); + } +} + +GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_MACRO_ASPECT_1; + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + return GNM_SURF_MACRO_ASPECT_2; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x8_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + return GNM_SURF_MACRO_ASPECT_4; + default: + abort(); + } +} + +// +// NEO mode macrotilemode stuff +// +GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_BANK_HEIGHT_1; + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + return GNM_SURF_BANK_HEIGHT_2; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x8_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + return GNM_SURF_BANK_HEIGHT_4; + default: + abort(); + } +} + +GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_2_BANK; + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + return GNM_SURF_4_BANK; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x4_16_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + return GNM_SURF_8_BANK; + case GNM_MACROTILEMODE_1x8_16: + return GNM_SURF_16_BANK; + default: + abort(); + } +} + +GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm) { + switch (mtm) { + case GNM_MACROTILEMODE_1x1_16: + case GNM_MACROTILEMODE_1x1_16_DUP: + case GNM_MACROTILEMODE_1x1_8: + case GNM_MACROTILEMODE_1x1_4: + case GNM_MACROTILEMODE_1x1_2: + case GNM_MACROTILEMODE_1x1_2_DUP: + case GNM_MACROTILEMODE_1x2_16_DUP: + case GNM_MACROTILEMODE_1x1_16_DUP2: + case GNM_MACROTILEMODE_1x1_8_DUP: + case GNM_MACROTILEMODE_1x1_4_DUP: + case GNM_MACROTILEMODE_1x1_2_DUP2: + case GNM_MACROTILEMODE_1x1_2_DUP3: + return GNM_SURF_MACRO_ASPECT_1; + case GNM_MACROTILEMODE_1x4_16: + case GNM_MACROTILEMODE_1x2_16: + case GNM_MACROTILEMODE_1x8_16: + case GNM_MACROTILEMODE_1x4_16_DUP: + return GNM_SURF_MACRO_ASPECT_2; + default: + abort(); + } +} + +uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg) { + switch (pipecfg) { + case GNM_ADDR_SURF_P2: + return 2; + case GNM_ADDR_SURF_P8_32x32_8x16: + case GNM_ADDR_SURF_P8_32x32_16x16: + return 8; + case GNM_ADDR_SURF_P16_32x32_8x16: + return 16; + default: + abort(); + } +} + +GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, uint32_t bpp, uint32_t numfrags, + GnmGpuMode gpumode) { + if (!outinfo || tilemode < GNM_TM_DEPTH_2D_THIN_64 || + tilemode > GNM_TM_DISPLAY_LINEAR_GENERAL) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmArrayMode arraymode = gpaGetArrayMode(tilemode); + + GnmNumBanks banks = GNM_SURF_2_BANK; + GnmBankWidth bankw = GNM_SURF_BANK_WIDTH_1; + GnmBankHeight bankh = GNM_SURF_BANK_HEIGHT_1; + GnmMacroTileAspect macroaspect = GNM_SURF_MACRO_ASPECT_1; + const GnmTileSplit tilesplit = gpaGetTileSplit(tilemode); + const GnmPipeConfig pipeconfig = + gpumode == GNM_GPU_NEO ? gpaGetAltPipeConfig(tilemode) : gpaGetPipeConfig(tilemode); + + if (gpaIsMacroTiled(arraymode)) { + GnmMacroTileMode macrotilemode = GNM_MACROTILEMODE_1x1_2; + GpaError err = gpaCalcSurfaceMacrotileMode(¯otilemode, tilemode, bpp, numfrags); + if (err != GPA_ERR_OK) { + return err; + } + + if (gpumode == GNM_GPU_NEO) { + banks = gpaGetAltNumBanks(macrotilemode); + bankh = gpaGetAltBankHeight(macrotilemode); + macroaspect = gpaGetAltMacrotileAspect(macrotilemode); + } else { + banks = gpaGetNumBanks(macrotilemode); + bankh = gpaGetBankHeight(macrotilemode); + macroaspect = gpaGetMacrotileAspect(macrotilemode); + } + + bankw = gpaGetBankWidth(macrotilemode); + } + + *outinfo = (GpaTileInfo){ + .arraymode = arraymode, + .banks = banks, + .bankwidth = bankw, + .bankheight = bankh, + .macroaspectratio = macroaspect, + .tilesplit = tilesplit, + .pipeconfig = pipeconfig, + }; + return GPA_ERR_OK; +} + +static uint32_t GetBankPipeSwizzle(uint32_t bankSwizzle, uint32_t pipeSwizzle, uint64_t baseAddr, + const GpaTileInfo* tileinfo) { + const uint32_t numPipes = gpaGetPipeCount(tileinfo->pipeconfig); + const uint32_t pipeBits = QLog2(numPipes); + const uint32_t bankInterleaveBits = QLog2(BANK_INTERLEAVE); + const uint32_t tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits); + + baseAddr ^= tileSwizzle * PIPE_INTERLEAVE_BYTES; + baseAddr >>= 8; + + return (uint32_t)baseAddr; +} + +GpaError gpaComputeBaseSwizzle(uint32_t* outswizzle, GnmTileMode tilemode, uint32_t surfindex, + uint32_t bpp, uint32_t numfrags, GnmGpuMode gpumode) { + if (!outswizzle) { + return GPA_ERR_INVALID_ARGS; + } + + GpaTileInfo tileinfo = {}; + GpaError err = gpaGetTileInfo(&tileinfo, tilemode, bpp, numfrags, gpumode); + if (err != GPA_ERR_OK) { + return err; + } + + if (!gpaIsMacroTiled(tileinfo.arraymode)) { + *outswizzle = 0; + return GPA_ERR_OK; + } + + /// This is a legacy misreading of h/w doc, use it as it doesn't hurt. + static const uint8_t bankRotationArray[4][16] = { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_2_BANK + {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_4_BANK + {0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_8_BANK + {0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9}, // ADDR_SURF_16_BANK + }; + + const uint32_t numBanks = 2 << tileinfo.banks; + const uint32_t numPipes = gpaGetPipeCount(tileinfo.pipeconfig); + + const uint32_t bankSwizzle = bankRotationArray[tileinfo.banks][surfindex & (numBanks - 1)]; + uint32_t pipeswizzle = 0; + if (ismacrotiled3d(tileinfo.arraymode)) { + pipeswizzle = surfindex & (numPipes - 1); + } + + *outswizzle = GetBankPipeSwizzle(bankSwizzle, pipeswizzle, 0, &tileinfo); + return GPA_ERR_OK; +} diff --git a/src/video_core/amdgpu/gpuaddr/tiler.cpp b/src/video_core/amdgpu/gpuaddr/tiler.cpp new file mode 100644 index 00000000..e1d9f77f --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/tiler.cpp @@ -0,0 +1,1287 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#include +#include +#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h" + +typedef struct { + GnmGpuMode mingpumode; + GnmTileMode tilemode; + GpaTileInfo tileinfo; + + uint32_t linearwidth; + uint32_t linearheight; + uint32_t lineardepth; + uint32_t paddedwidth; + uint32_t paddedheight; + uint32_t paddeddepth; + + uint32_t bitsperelement; + uint32_t numfragsperpixel; + + uint32_t bankswizzlemask; + uint32_t pipeswizzlemask; +} GpaTilerContext; + +static GpaError createtilerctx(GpaTilerContext* ctx, size_t outsurfsize, size_t insurfsize, + const GpaTilingParams* tp) { + GpaSurfaceInfo surfinfo = {0}; + GpaError err = gpaComputeSurfaceInfo(&surfinfo, tp); + if (err != GPA_ERR_OK) { + return err; + } + + // check if buffers are large enough + if (surfinfo.surfacesize > insurfsize || surfinfo.surfacesize > outsurfsize) { + return GPA_ERR_OVERFLOW; + } + + GpaTileInfo tileinfo = {}; + err = gpaGetTileInfo(&tileinfo, tp->tilemode, tp->bitsperfrag, tp->numfragsperpixel, + tp->mingpumode); + if (err != GPA_ERR_OK) { + return err; + } + + *ctx = (GpaTilerContext){ + .mingpumode = tp->mingpumode, + .tilemode = tp->tilemode, + .tileinfo = tileinfo, + + .linearwidth = tp->linearwidth, + .linearheight = tp->linearheight, + .lineardepth = tp->lineardepth, + .paddedwidth = surfinfo.pitch, + .paddedheight = surfinfo.height, + .paddeddepth = surfinfo.depth, + + .bitsperelement = tp->bitsperfrag, + .numfragsperpixel = tp->numfragsperpixel, + + // TODO: calc swizzle? + .bankswizzlemask = 0, + .pipeswizzlemask = 0, + }; + + // TODO: why is this here? + // BC7 tests fail is this isn't here, + // but shouldn't this be handled by something else already? + if (!gpaIsLinear(surfinfo.tileinfo.arraymode) && tp->isblockcompressed) { + switch (tp->bitsperfrag) { + case 1: + ctx->bitsperelement *= 8; + ctx->linearwidth = std::max((ctx->linearwidth + 7) / 8, 1U); + ctx->paddedwidth = std::max((ctx->paddedwidth + 7) / 8, 1U); + break; + case 4: + case 8: + ctx->bitsperelement *= 16; + ctx->linearwidth = std::max((ctx->linearwidth + 3) / 4, 1U); + ctx->linearheight = std::max((ctx->linearheight + 3) / 4, 1U); + ctx->paddedwidth = std::max((ctx->paddedwidth + 3) / 4, 1U); + ctx->paddedheight = std::max((ctx->paddedheight + 3) / 4, 1U); + break; + case 16: + default: + return GPA_ERR_UNSUPPORTED; + } + } + + return GPA_ERR_OK; +} + +static GpaError ComputePixelIndexWithinMicroTile( + uint32_t* outIndex, + uint32_t x, ///< [in] x coord + uint32_t y, ///< [in] y coord + uint32_t z, ///< [in] slice/depth index + uint32_t bpp, ///< [in] bits per pixel + GnmArrayMode arrayMode, ///< [in] tile mode + GnmMicroTileMode microTileType ///< [in] pixel order in display/non-display mode +) { + uint32_t pixelBit0 = 0; + uint32_t pixelBit1 = 0; + uint32_t pixelBit2 = 0; + uint32_t pixelBit3 = 0; + uint32_t pixelBit4 = 0; + uint32_t pixelBit5 = 0; + uint32_t pixelBit6 = 0; + uint32_t pixelBit7 = 0; + uint32_t pixelBit8 = 0; + + const uint32_t x0 = (x >> 0) & 1; + const uint32_t x1 = (x >> 1) & 1; + const uint32_t x2 = (x >> 2) & 1; + const uint32_t y0 = (y >> 0) & 1; + const uint32_t y1 = (y >> 1) & 1; + const uint32_t y2 = (y >> 2) & 1; + const uint32_t z0 = (z >> 0) & 1; + const uint32_t z1 = (z >> 1) & 1; + const uint32_t z2 = (z >> 2) & 1; + + const uint32_t thickness = gpaGetMicroTileThickness(arrayMode); + + // Compute the pixel number within the micro tile. + + if (microTileType != GNM_SURF_THICK_MICRO_TILING) { + if (microTileType == GNM_SURF_DISPLAY_MICRO_TILING) { + switch (bpp) { + case 8: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y1; + pixelBit4 = y0; + pixelBit5 = y2; + break; + case 16: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y0; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = y0; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 64: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 128: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + default: + return GPA_ERR_INTERNAL_ERROR; + } + } else if (microTileType != GNM_SURF_DISPLAY_MICRO_TILING || + microTileType == GNM_SURF_DEPTH_MICRO_TILING) { + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = x2; + pixelBit5 = y2; + } else if (microTileType == GNM_SURF_ROTATED_MICRO_TILING) { + if (thickness != 1) { + return GPA_ERR_INTERNAL_ERROR; + } + + switch (bpp) { + case 8: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x1; + pixelBit4 = x0; + pixelBit5 = x2; + break; + case 16: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x0; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 32: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = x0; + pixelBit3 = y2; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 64: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = y1; + pixelBit3 = x1; + pixelBit4 = x2; + pixelBit5 = y2; + break; + default: + return GPA_ERR_INTERNAL_ERROR; + } + } + + if (thickness > 1) { + pixelBit6 = z0; + pixelBit7 = z1; + } + } else // ADDR_THICK + { + if (thickness <= 1) { + return GPA_ERR_INTERNAL_ERROR; + } + + switch (bpp) { + case 8: + case 16: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = z0; + pixelBit5 = z1; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = z0; + pixelBit4 = y1; + pixelBit5 = z1; + break; + case 64: + case 128: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = z0; + pixelBit3 = x1; + pixelBit4 = y1; + pixelBit5 = z1; + break; + default: + return GPA_ERR_INTERNAL_ERROR; + } + + pixelBit6 = x2; + pixelBit7 = y2; + } + + if (thickness == 8) { + pixelBit8 = z2; + } + + *outIndex = + ((pixelBit0) | (pixelBit1 << 1) | (pixelBit2 << 2) | (pixelBit3 << 3) | (pixelBit4 << 4) | + (pixelBit5 << 5) | (pixelBit6 << 6) | (pixelBit7 << 7) | (pixelBit8 << 8)); + return GPA_ERR_OK; +} + +static uint64_t ComputeSurfaceAddrFromCoordLinear( + uint32_t x, ///< [in] x coord + uint32_t y, ///< [in] y coord + uint32_t slice, ///< [in] slice/depth index + uint32_t sample, ///< [in] sample index + uint32_t bpp, ///< [in] bits per pixel + uint32_t pitch, ///< [in] pitch + uint32_t height, ///< [in] height + uint32_t numSlices, ///< [in] number of slices + uint32_t* pBitPosition ///< [out] bit position inside a byte +) { + const uint64_t sliceSize = (uint64_t)pitch * height; + + uint64_t sliceOffset = (slice + sample * numSlices) * sliceSize; + uint64_t rowOffset = (uint64_t)y * pitch; + uint64_t pixOffset = x; + + uint64_t addr = (sliceOffset + rowOffset + pixOffset) * bpp; + + if (pBitPosition) { + *pBitPosition = (uint32_t)(addr % 8); + } + addr /= 8; + + return addr; +} + +static GpaError ComputeSurfaceAddrFromCoordMicroTiled( + uint32_t x, ///< [in] x coordinate + uint32_t y, ///< [in] y coordinate + uint32_t slice, ///< [in] slice index + uint32_t sample, ///< [in] sample index + uint32_t bpp, ///< [in] bits per pixel + uint32_t pitch, ///< [in] pitch, in pixels + uint32_t height, ///< [in] height, in pixels + uint32_t numSamples, ///< [in] number of samples + GnmArrayMode arrayMode, ///< [in] tile mode + GnmMicroTileMode microTileType, ///< [in] micro tiling type + bool isDepthSampleOrder, ///< [in] TRUE if depth sample ordering is used + uint64_t* outAddr, ///< [out] byte position + uint32_t* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this +) { + uint32_t microTileBytes; + uint64_t sliceBytes; + uint32_t microTilesPerRow; + uint32_t microTileIndexX; + uint32_t microTileIndexY; + uint32_t microTileIndexZ; + uint64_t sliceOffset; + uint64_t microTileOffset; + uint32_t sampleOffset; + uint32_t pixelIndex; + uint32_t pixelOffset; + + const uint32_t microTileThickness = gpaGetMicroTileThickness(arrayMode); + + // + // Compute the micro tile size. + // + microTileBytes = BitsToBytes32(MicroTilePixels * microTileThickness * bpp * numSamples); + + // + // Compute the slice size. + // + sliceBytes = BitsToBytes64((uint64_t)pitch * height * microTileThickness * bpp * numSamples); + + // + // Compute the number of micro tiles per row. + // + microTilesPerRow = pitch / MicroTileWidth; + + // + // Compute the micro tile index. + // + microTileIndexX = x / MicroTileWidth; + microTileIndexY = y / MicroTileHeight; + microTileIndexZ = slice / microTileThickness; + + // + // Compute the slice offset. + // + sliceOffset = (uint64_t)microTileIndexZ * sliceBytes; + + // + // Compute the offset to the micro tile containing the specified + // coordinate. + // + microTileOffset = + ((uint64_t)microTileIndexY * microTilesPerRow + microTileIndexX) * microTileBytes; + + // + // Compute the pixel index within the micro tile. + // + GpaError err = + ComputePixelIndexWithinMicroTile(&pixelIndex, x, y, slice, bpp, arrayMode, microTileType); + if (err != GPA_ERR_OK) { + return err; + } + + // Compute the sample offset. + // + if (isDepthSampleOrder) { + // + // For depth surfaces, samples are stored contiguously + // for each element, so the sample offset is the sample + // number times the element size. + // + sampleOffset = sample * bpp; + pixelOffset = pixelIndex * bpp * numSamples; + } else { + // + // For color surfaces, all elements for a particular + // sample are stored contiguously, so the sample offset + // is the sample number times the micro tile size + // divided yBit the number of samples. + // + sampleOffset = sample * (microTileBytes * 8 / numSamples); + pixelOffset = pixelIndex * bpp; + } + + // + // Compute the bit position of the pixel. Each element is + // stored with one bit per sample. + // + + uint32_t elemOffset = sampleOffset + pixelOffset; + + *pBitPosition = elemOffset % 8; + elemOffset /= 8; + + // + // Combine the slice offset, micro tile offset, sample offset, + // and pixel offsets. + // + *outAddr = sliceOffset + microTileOffset + elemOffset; + + return GPA_ERR_OK; +} + +static GpaError ComputePipeFromCoord(uint32_t x, ///< [in] x coordinate + uint32_t y, ///< [in] y coordinate + uint32_t slice, ///< [in] slice index + GnmArrayMode arrayMode, ///< [in] tile mode + uint32_t pipeSwizzle, ///< [in] pipe swizzle + const GpaTileInfo* pTileInfo, ///< [in] Tile info + uint32_t* outCoord ///< [out] resulting coordinate +) { + uint32_t pipe; + uint32_t pipeBit0 = 0; + uint32_t pipeBit1 = 0; + uint32_t pipeBit2 = 0; + uint32_t pipeBit3 = 0; + uint32_t sliceRotation; + uint32_t numPipes = 0; + + const uint32_t tx = x / MicroTileWidth; + const uint32_t ty = y / MicroTileHeight; + const uint32_t x3 = (tx >> 0) & 1; + const uint32_t x4 = (tx >> 1) & 1; + const uint32_t x5 = (tx >> 2) & 1; + const uint32_t x6 = (tx >> 3) & 1; + const uint32_t y3 = (ty >> 0) & 1; + const uint32_t y4 = (ty >> 1) & 1; + const uint32_t y5 = (ty >> 2) & 1; + const uint32_t y6 = (ty >> 3) & 1; + + switch (pTileInfo->pipeconfig) { + case GNM_ADDR_SURF_P2: + pipeBit0 = x3 ^ y3; + numPipes = 2; + break; + case GNM_ADDR_SURF_P8_32x32_8x16: + pipeBit0 = x4 ^ y3 ^ x5; + pipeBit1 = x3 ^ y4; + pipeBit2 = x5 ^ y5; + numPipes = 8; + break; + case GNM_ADDR_SURF_P8_32x32_16x16: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y4; + pipeBit2 = x5 ^ y5; + numPipes = 8; + break; + case GNM_ADDR_SURF_P16_32x32_8x16: + pipeBit0 = x4 ^ y3; + pipeBit1 = x3 ^ y4; + pipeBit2 = x5 ^ y6; + pipeBit3 = x6 ^ y5; + numPipes = 16; + break; + default: + return GPA_ERR_UNSUPPORTED; + } + + pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3); + + const uint32_t microTileThickness = gpaGetMicroTileThickness(arrayMode); + + // + // Apply pipe rotation for the slice. + // + switch (arrayMode) { + case GNM_ARRAY_3D_TILED_THIN1: // fall through thin + case GNM_ARRAY_3D_TILED_THICK: // fall through thick + case GNM_ARRAY_3D_TILED_XTHICK: + sliceRotation = std::max(1, (int32_t)(numPipes / 2) - 1) * (slice / microTileThickness); + break; + default: + sliceRotation = 0; + break; + } + pipeSwizzle += sliceRotation; + pipeSwizzle &= (numPipes - 1); + + *outCoord = pipe ^ pipeSwizzle; + return GPA_ERR_OK; +} + +static GpaError ComputeBankFromCoord( + uint32_t x, ///< [in] x coordinate + uint32_t y, ///< [in] y coordinate + uint32_t slice, ///< [in] slice index + GnmArrayMode arrayMode, ///< [in] tile mode + uint32_t bankSwizzle, ///< [in] bank swizzle + uint32_t tileSplitSlice, ///< [in] If the size of the pixel offset is + ///< larger than the + /// tile split size, then the pixel will be + /// moved to a separate slice. This value + /// equals pixelOffset / tileSplitBytes in + /// this case. Otherwise this is 0. + const GpaTileInfo* pTileInfo, ///< [in] tile info + uint32_t* outCoord ///< [out] resulting coord +) { + const uint32_t pipes = gpaGetPipeCount(pTileInfo->pipeconfig); + uint32_t bankBit0 = 0; + uint32_t bankBit1 = 0; + uint32_t bankBit2 = 0; + uint32_t bankBit3 = 0; + uint32_t sliceRotation; + uint32_t tileSplitRotation; + uint32_t bank; + const uint32_t numBanks = 2 << pTileInfo->banks; + const uint32_t bankWidth = (1 << pTileInfo->bankwidth); + const uint32_t bankHeight = (1 << pTileInfo->bankheight); + + const uint32_t tx = x / MicroTileWidth / (bankWidth * pipes); + const uint32_t ty = y / MicroTileHeight / bankHeight; + + const uint32_t x3 = (tx >> 0) & 1; + const uint32_t x4 = (tx >> 1) & 1; + const uint32_t x5 = (tx >> 2) & 1; + const uint32_t x6 = (tx >> 3) & 1; + const uint32_t y3 = (ty >> 0) & 1; + const uint32_t y4 = (ty >> 1) & 1; + const uint32_t y5 = (ty >> 2) & 1; + const uint32_t y6 = (ty >> 3) & 1; + + switch (numBanks) { + case 16: + bankBit0 = x3 ^ y6; + bankBit1 = x4 ^ y5 ^ y6; + bankBit2 = x5 ^ y4; + bankBit3 = x6 ^ y3; + break; + case 8: + bankBit0 = x3 ^ y5; + bankBit1 = x4 ^ y4 ^ y5; + bankBit2 = x5 ^ y3; + break; + case 4: + bankBit0 = x3 ^ y4; + bankBit1 = x4 ^ y3; + break; + case 2: + bankBit0 = x3 ^ y3; + break; + default: + return GPA_ERR_UNSUPPORTED; + } + + bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3); + + // Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0); + + // bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo); + // + // Compute bank rotation for the slice. + // + const uint32_t microTileThickness = gpaGetMicroTileThickness(arrayMode); + + switch (arrayMode) { + case GNM_ARRAY_2D_TILED_THIN1: // fall through + case GNM_ARRAY_2D_TILED_THICK: // fall through + case GNM_ARRAY_2D_TILED_XTHICK: + sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness); + break; + case GNM_ARRAY_3D_TILED_THIN1: // fall through + case GNM_ARRAY_3D_TILED_THICK: // fall through + case GNM_ARRAY_3D_TILED_XTHICK: + sliceRotation = std::max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes; + break; + default: + sliceRotation = 0; + break; + } + + // + // Compute bank rotation for the tile split slice. + // + // The sample slice will be non-zero if samples must be split + // across multiple slices. This situation arises when the micro + // tile size multiplied yBit the number of samples exceeds the + // split size (set in GB_ADDR_CONFIG). + // + switch (arrayMode) { + case GNM_ARRAY_2D_TILED_THIN1: // fall through + case GNM_ARRAY_3D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_2D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_3D_TILED_THIN1: // fall through + tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice; + break; + default: + tileSplitRotation = 0; + break; + } + + // + // Apply bank rotation for the slice and tile split slice. + // + bank ^= bankSwizzle + sliceRotation; + bank ^= tileSplitRotation; + + bank &= (numBanks - 1); + + *outCoord = bank; + return GPA_ERR_OK; +} + +static GpaError ComputeSurfaceAddrFromCoordMacroTiled( + uint32_t x, ///< [in] x coordinate + uint32_t y, ///< [in] y coordinate + uint32_t slice, ///< [in] slice index + uint32_t sample, ///< [in] sample index + uint32_t bpp, ///< [in] bits per pixel + uint32_t pitch, ///< [in] surface pitch, in pixels + uint32_t height, ///< [in] surface height, in pixels + uint32_t numSamples, ///< [in] number of samples + GnmArrayMode arrayMode, ///< [in] tile mode + GnmMicroTileMode microTileType, ///< [in] micro tiling type + bool isDepthSampleOrder, ///< [in] TRUE if it depth sample ordering is used + uint32_t pipeSwizzle, ///< [in] pipe swizzle + uint32_t bankSwizzle, ///< [in] bank swizzle + const GpaTileInfo* pTileInfo, ///< [in] bank structure **All fields to be + ///< valid on entry** + uint64_t* pBytePosition, ///< [out] byte position + uint32_t* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this +) { + uint64_t addr; + + uint32_t microTileBytes; + uint32_t microTileBits; + uint32_t sampleOffset; + uint32_t pixelIndex; + uint32_t pixelOffset; + uint32_t elementOffset; + uint32_t tileSplitSlice; + uint32_t pipe; + uint32_t bank; + uint64_t sliceBytes; + uint64_t sliceOffset; + uint32_t macroTilePitch; + uint32_t macroTileHeight; + uint32_t macroTilesPerRow; + uint32_t macroTilesPerSlice; + uint64_t macroTileBytes; + uint32_t macroTileIndexX; + uint32_t macroTileIndexY; + uint64_t macroTileOffset; + uint64_t totalOffset; + uint64_t pipeInterleaveMask; + uint64_t bankInterleaveMask; + uint64_t pipeInterleaveOffset; + uint32_t bankInterleaveOffset; + uint64_t offset; + uint32_t tileRowIndex; + uint32_t tileColumnIndex; + uint32_t tileIndex; + uint32_t tileOffset; + + uint32_t microTileThickness = gpaGetMicroTileThickness(arrayMode); + + const uint32_t banks = 2 << pTileInfo->banks; + const uint32_t bankWidth = (1 << pTileInfo->bankwidth); + const uint32_t bankHeight = (1 << pTileInfo->bankheight); + const uint32_t macroAspectRatio = (1 << pTileInfo->macroaspectratio); + const uint32_t tileSplitBytes = + GetTileSplitBytes(pTileInfo->tilesplit, bpp, microTileThickness); + + // + // Compute the number of group, pipe, and bank bits. + // + uint32_t numPipes = gpaGetPipeCount(pTileInfo->pipeconfig); + uint32_t numPipeInterleaveBits = log2(PIPE_INTERLEAVE_BYTES); + uint32_t numPipeBits = log2(numPipes); + uint32_t numBankInterleaveBits = log2(BANK_INTERLEAVE); + uint32_t numBankBits = log2(banks); + + // + // Compute the micro tile size. + // + microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; + + microTileBytes = microTileBits / 8; + // + // Compute the pixel index within the micro tile. + // + GpaError err = + ComputePixelIndexWithinMicroTile(&pixelIndex, x, y, slice, bpp, arrayMode, microTileType); + if (err != GPA_ERR_OK) { + return err; + } + + // + // Compute the sample offset and pixel offset. + // + if (isDepthSampleOrder) { + // + // For depth surfaces, samples are stored contiguously + // for each element, so the sample offset is the sample + // number times the element size. + // + sampleOffset = sample * bpp; + pixelOffset = pixelIndex * bpp * numSamples; + } else { + // + // For color surfaces, all elements for a particular + // sample are stored contiguously, so the sample offset + // is the sample number times the micro tile size + // divided yBit the number of samples. + // + sampleOffset = sample * (microTileBits / numSamples); + pixelOffset = pixelIndex * bpp; + } + + // + // Compute the element offset. + // + elementOffset = pixelOffset + sampleOffset; + + *pBitPosition = (uint32_t)(elementOffset % 8); + + elementOffset /= 8; // bit-to-byte + + // + // Determine if tiles need to be split across slices. + // + // If the size of the micro tile is larger than the tile split + // size, then the tile will be split across multiple slices. + // + uint32_t slicesPerTile = 1; + + if ((microTileBytes > tileSplitBytes) && + (microTileThickness == 1)) { // don't support for thick mode + + // + // Compute the number of slices per tile. + // + slicesPerTile = microTileBytes / tileSplitBytes; + + // + // Compute the tile split slice number for use in + // rotating the bank. + // + tileSplitSlice = elementOffset / tileSplitBytes; + + // + // Adjust the element offset to account for the portion + // of the tile that is being moved to a new slice.. + // + elementOffset %= tileSplitBytes; + + // + // Adjust the microTileBytes size to tileSplitBytes size + // since a new slice.. + // + microTileBytes = tileSplitBytes; + } else { + tileSplitSlice = 0; + } + + // + // Compute macro tile pitch and height. + // + macroTilePitch = (MicroTileWidth * bankWidth * numPipes) * macroAspectRatio; + macroTileHeight = (MicroTileHeight * bankHeight * banks) / macroAspectRatio; + + // + // Compute the number of bytes per macro tile. Note: bytes of + // the same bank/pipe actually + // + macroTileBytes = (uint64_t)microTileBytes * (macroTilePitch / MicroTileWidth) * + (macroTileHeight / MicroTileHeight) / (numPipes * banks); + + // + // Compute the number of macro tiles per row. + // + macroTilesPerRow = pitch / macroTilePitch; + + // + // Compute the offset to the macro tile containing the specified + // coordinate. + // + macroTileIndexX = x / macroTilePitch; + macroTileIndexY = y / macroTileHeight; + macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; + + // + // Compute the number of macro tiles per slice. + // + macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight); + + // + // Compute the slice size. + // + sliceBytes = macroTilesPerSlice * macroTileBytes; + + // + // Compute the slice offset. + // + sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness)); + + // + // Compute tile offest + // + tileRowIndex = (y / MicroTileHeight) % bankHeight; + tileColumnIndex = ((x / MicroTileWidth) / numPipes) % bankWidth; + tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex; + tileOffset = tileIndex * microTileBytes; + + // + // Combine the slice offset and macro tile offset with the pixel + // and sample offsets, accounting for the pipe and bank bits in + // the middle of the address. + // + totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset; + + // + // Get the pipe and bank. + // + + // when the tileMode is PRT type, then adjust x and y + // coordinates + if (gpaIsPrt(arrayMode)) { + x = x % macroTilePitch; + y = y % macroTileHeight; + } + + err = ComputePipeFromCoord(x, y, slice, arrayMode, pipeSwizzle, pTileInfo, &pipe); + if (err != GPA_ERR_OK) { + return err; + } + + err = + ComputeBankFromCoord(x, y, slice, arrayMode, bankSwizzle, tileSplitSlice, pTileInfo, &bank); + if (err != GPA_ERR_OK) { + return err; + } + + // + // Split the offset to put some bits below the pipe+bank bits + // and some above. + // + pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1; + bankInterleaveMask = (1 << numBankInterleaveBits) - 1; + pipeInterleaveOffset = totalOffset & pipeInterleaveMask; + bankInterleaveOffset = (uint32_t)((totalOffset >> numPipeInterleaveBits) & bankInterleaveMask); + offset = totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits); + + // + // Assemble the address from its components. + // + addr = pipeInterleaveOffset; + // This is to remove /analyze warnings + uint32_t pipeBits = pipe << numPipeInterleaveBits; + uint32_t bankInterleaveBits = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits); + uint32_t bankBits = bank << (numPipeInterleaveBits + numPipeBits + numBankInterleaveBits); + uint64_t offsetBits = + offset << (numPipeInterleaveBits + numPipeBits + numBankInterleaveBits + numBankBits); + + addr |= pipeBits; + addr |= bankInterleaveBits; + addr |= bankBits; + addr |= offsetBits; + + *pBytePosition = addr; + return GPA_ERR_OK; +} + +static GpaError gpaComputeSurfaceOffset(uint64_t* outoffset, uint64_t* outbitoffset, + const GpaTilerContext* ctx, uint32_t x, uint32_t y, + uint32_t z, uint32_t fragindex) { + if (x > ctx->paddedwidth || y > ctx->paddedheight || z > ctx->paddeddepth || + fragindex > ctx->numfragsperpixel) { + return GPA_ERR_INVALID_ARGS; + } + + const GnmArrayMode arraymode = gpaGetArrayMode(ctx->tilemode); + const GnmMicroTileMode microTileType = gpaGetMicroTileMode(ctx->tilemode); + // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order + const bool isDepthSampleOrder = microTileType == GNM_SURF_DEPTH_MICRO_TILING; + + if (ctx->mingpumode == GNM_GPU_NEO) { + /// @note + /// 128 bit/thick tiled surface doesn't support display + /// tiling and mipmap chain must have the same tileType, + /// so please fill tileType correctly + if (!gpaIsLinear(arraymode)) { + if (ctx->bitsperelement >= 128 || gpaGetMicroTileThickness(arraymode) > 1) { + if (microTileType == GNM_SURF_DISPLAY_MICRO_TILING) { + return GPA_ERR_INTERNAL_ERROR; + } + } + } + } + + GpaError err = GPA_ERR_OK; + uint32_t bitPosition = 0; + uint64_t addr = 0; + + switch (arraymode) { + case GNM_ARRAY_LINEAR_GENERAL: // fall through + case GNM_ARRAY_LINEAR_ALIGNED: + addr = gpaComputeSurfaceAddrFromCoordLinear(x, y, z, fragindex, ctx->bitsperelement, + ctx->paddedwidth, ctx->paddedheight, + ctx->paddeddepth, &bitPosition); + break; + case GNM_ARRAY_1D_TILED_THIN1: // fall through + case GNM_ARRAY_1D_TILED_THICK: + err = ComputeSurfaceAddrFromCoordMicroTiled( + x, y, z, fragindex, ctx->bitsperelement, ctx->paddedwidth, ctx->paddedheight, + ctx->paddeddepth, arraymode, microTileType, isDepthSampleOrder, &addr, &bitPosition); + break; + case GNM_ARRAY_2D_TILED_THIN1: // fall through + case GNM_ARRAY_2D_TILED_THICK: // fall through + case GNM_ARRAY_3D_TILED_THIN1: // fall through + case GNM_ARRAY_3D_TILED_THICK: // fall through + case GNM_ARRAY_2D_TILED_XTHICK: // fall through + case GNM_ARRAY_3D_TILED_XTHICK: // fall through + case GNM_ARRAY_PRT_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_2D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_3D_TILED_THIN1: // fall through + case GNM_ARRAY_PRT_TILED_THICK: // fall through + case GNM_ARRAY_PRT_2D_TILED_THICK: // fall through + case GNM_ARRAY_PRT_3D_TILED_THICK: + err = ComputeSurfaceAddrFromCoordMacroTiled( + x, y, z, fragindex, ctx->bitsperelement, ctx->paddedwidth, ctx->paddedheight, + ctx->paddeddepth, arraymode, microTileType, isDepthSampleOrder, ctx->pipeswizzlemask, + ctx->bankswizzlemask, &ctx->tileinfo, &addr, &bitPosition); + break; + default: + err = GPA_ERR_INTERNAL_ERROR; + break; + } + + if (outoffset) { + *outoffset = addr; + } + if (outbitoffset) { + *outbitoffset = bitPosition; + } + return err; +} + +GpaError gpaTpInit(GpaTilingParams* tp, const GpaTextureInfo* tex, uint32_t miplevel, + uint32_t arrayslice) { + if (!tp || !tex) { + return GPA_ERR_INVALID_ARGS; + } + if (miplevel > tex->nummips) { + return GPA_ERR_INVALID_ARGS; + } + + const bool iscubemap = tex->type == GNM_TEXTURE_CUBEMAP; + const bool isvolume = tex->type == GNM_TEXTURE_3D; + const GnmDataFormat fmt = tex->fmt; + const GnmMicroTileMode mtm = gpaGetMicroTileMode(tex->tm); + const uint32_t totalbitsperelem = gnmDfGetTotalBitsPerElement(fmt); + const uint32_t texelsperelem = gnmDfGetTexelsPerElement(fmt); + + uint32_t numarrayslices = tex->numslices; + if (tex->type == GNM_TEXTURE_CUBEMAP) { + numarrayslices *= 6; + } else if (tex->type == GNM_TEXTURE_3D) { + numarrayslices = 1; + } + if (tex->pow2pad) { + numarrayslices = NextPow2(numarrayslices); + } + + if (arrayslice >= numarrayslices) { + return GPA_ERR_INVALID_ARGS; + } + + tp->tilemode = tex->tm; + tp->mingpumode = tex->mingpumode; + + tp->linearwidth = std::max(tex->width >> miplevel, 1U); + tp->linearheight = std::max(tex->height >> miplevel, 1U); + tp->lineardepth = std::max(tex->depth >> miplevel, 1U); + tp->numfragsperpixel = tex->numfrags; + tp->basetiledpitch = tex->pitch; + + tp->miplevel = miplevel; + tp->arrayslice = arrayslice; + + if (!isvolume && mtm == GNM_SURF_DEPTH_MICRO_TILING) { + if (gnmDfGetZFormat(fmt) != GNM_Z_INVALID) { + tp->surfaceflags.depthtarget = 1; + } + if (gnmDfGetStencilFormat(fmt) != GNM_STENCIL_INVALID) { + tp->surfaceflags.stenciltarget = 1; + } + } + tp->surfaceflags.cube = iscubemap; + tp->surfaceflags.volume = isvolume; + tp->surfaceflags.pow2pad = tex->pow2pad; + if (tex->mingpumode == GNM_GPU_NEO) { + tp->surfaceflags.texcompatible = 1; + } + + tp->bitsperfrag = totalbitsperelem / texelsperelem; + tp->isblockcompressed = texelsperelem > 1; + + GpaSurfaceInfo surfinfo = {0}; + GpaError err = gpaComputeSurfaceInfo(&surfinfo, tp); + if (err != GPA_ERR_OK) { + return err; + } + err = gpaAdjustTileMode(&tp->tilemode, tp->tilemode, surfinfo.tileinfo.arraymode); + if (err != GPA_ERR_OK) { + return err; + } + + return GPA_ERR_OK; +} + +static GpaError initregioninfo(GpaSurfaceRegion* region, uint32_t* elemwidth, uint32_t* elemheight, + const GpaTilingParams* tp) { + region->right = tp->linearwidth; + region->bottom = tp->linearheight; + region->back = tp->lineardepth; + *elemwidth = tp->linearwidth; + *elemheight = tp->linearheight; + + if (tp->isblockcompressed) { + switch (tp->bitsperfrag) { + case 1: + region->left = (region->left + 7) / 8; + region->right = (region->right + 7) / 8; + *elemwidth = (*elemwidth + 7) / 8; + break; + case 4: + case 8: + region->left = (region->left + 3) / 4; + region->top = (region->top + 3) / 4; + region->right = (region->right + 3) / 4; + region->bottom = (region->bottom + 3) / 4; + *elemwidth = (*elemwidth + 3) / 4; + *elemheight = (*elemheight + 3) / 4; + break; + case 16: + return GPA_ERR_UNSUPPORTED; + default: + return GPA_ERR_INVALID_ARGS; + } + } + + return GPA_ERR_OK; +} + +GpaError gpaTileSurface(void* outtile, size_t outtilesize, const void* inuntile, + size_t inuntilesize, const GpaTilingParams* tp) { + if (!outtile || !outtilesize || !inuntile || !inuntilesize || !tp) { + return GPA_ERR_INVALID_ARGS; + } + + GpaSurfaceRegion region = {0}; + uint32_t elemwidth = 0; + uint32_t elemheight = 0; + initregioninfo(®ion, &elemwidth, &elemheight, tp); + + return gpaTileSurfaceRegion(outtile, outtilesize, inuntile, inuntilesize, tp, ®ion, + elemwidth, elemwidth * elemheight); +} + +static inline bool regionhastexels(const GpaSurfaceRegion* region) { + const uint32_t width = region->right - region->left; + const uint32_t height = region->bottom - region->top; + const uint32_t depth = region->back - region->top; + return width > 0 && height > 0 && depth > 0; +} + +GpaError gpaTileSurfaceRegion(void* outtile, size_t outtilesize, const void* inuntile, + size_t inuntilesize, const GpaTilingParams* tp, + const GpaSurfaceRegion* region, uint32_t srcpitch, + uint32_t srcslicepitch) { + if (!outtile || !outtilesize || !inuntile || !inuntilesize || !tp || !region) { + return GPA_ERR_INVALID_ARGS; + } + + if (!regionhastexels(region)) { + // nothing to convert + return GPA_ERR_OK; + } + + GpaTilerContext ctx = {}; + GpaError err = createtilerctx(&ctx, outtilesize, inuntilesize, tp); + if (err != GPA_ERR_OK) { + return err; + } + const uint32_t elembytesize = ctx.bitsperelement / 8; + const uint32_t lz = region->back; + const uint32_t ly = region->bottom; + const uint32_t lx = region->right; + const uint32_t lf = ctx.numfragsperpixel; + for (uint32_t z = region->front; z < lz; z += 1) { + for (uint32_t y = region->top; y < ly; y += 1) { + for (uint32_t x = region->left; x < lx; x += 1) { + for (uint32_t f = 0; f < lf; f += 1) { + const uint32_t linearoffset = ComputeSurfaceAddrFromCoordLinear( + x, y, z, f, elembytesize * 8, srcpitch, srcslicepitch, 1, NULL); + uint64_t tiledoffset = 0; + GpaError gerr = gpaComputeSurfaceOffset(&tiledoffset, NULL, &ctx, x, y, z, f); + if (gerr != GPA_ERR_OK) { + return gerr; + } + + memcpy((uint8_t*)outtile + tiledoffset, (const uint8_t*)inuntile + linearoffset, + elembytesize); + } + } + } + } + + return GPA_ERR_OK; +} + +GpaError gpaDetileSurface(void* outuntile, size_t outuntilesize, const void* intile, + size_t intilesize, const GpaTilingParams* tp) { + if (!outuntile || !outuntilesize || !intile || !intilesize || !tp) { + return GPA_ERR_INVALID_ARGS; + } + + GpaSurfaceRegion region = {0}; + uint32_t elemwidth = 0; + uint32_t elemheight = 0; + initregioninfo(®ion, &elemwidth, &elemheight, tp); + + return gpaDetileSurfaceRegion(outuntile, outuntilesize, intile, intilesize, tp, ®ion, + elemwidth, elemwidth * elemheight); +} + +GpaError gpaDetileSurfaceRegion(void* outuntile, size_t outuntilesize, const void* intile, + size_t intilesize, const GpaTilingParams* tp, + const GpaSurfaceRegion* region, uint32_t dstpitch, + uint32_t dstslicepitch) { + if (!outuntile || !outuntilesize || !intile || !intilesize || !tp || !region) { + return GPA_ERR_INVALID_ARGS; + } + + if (!regionhastexels(region)) { + // nothing to convert + return GPA_ERR_OK; + } + + GpaTilerContext ctx = {}; + GpaError err = createtilerctx(&ctx, outuntilesize, intilesize, tp); + if (err != GPA_ERR_OK) { + return err; + } + + const uint32_t elembytesize = ctx.bitsperelement / 8; + const uint32_t lz = region->back; + const uint32_t ly = region->bottom; + const uint32_t lx = region->right; + const uint32_t lf = ctx.numfragsperpixel; + for (uint32_t z = region->front; z < lz; z += 1) { + for (uint32_t y = region->top; y < ly; y += 1) { + for (uint32_t x = region->left; x < lx; x += 1) { + for (uint32_t f = 0; f < lf; f += 1) { + const uint32_t linearoffset = ComputeSurfaceAddrFromCoordLinear( + x, y, z, f, elembytesize * 8, dstpitch, dstslicepitch, 1, NULL); + uint64_t tiledoffset = 0; + GpaError gerr = gpaComputeSurfaceOffset(&tiledoffset, NULL, &ctx, x, y, z, f); + if (gerr != GPA_ERR_OK) { + return gerr; + } + + memcpy((uint8_t*)outuntile + linearoffset, (const uint8_t*)intile + tiledoffset, + elembytesize); + } + } + } + } + + return GPA_ERR_OK; +} + +GpaError gpaTileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo, uint32_t mip, + uint32_t slice) { + if (!inbuffer || !inbuffersize || !outbuffer || !outbuffersize || !texinfo) { + return GPA_ERR_INVALID_ARGS; + } + + GpaTilingParams tp = {}; + GpaError res = gpaTpInit(&tp, texinfo, mip, slice); + if (res != GPA_ERR_OK) { + return res; + } + + uint64_t surfoffset = 0; + uint64_t surfsize = 0; + res = gpaCalcSurfaceSizeOffset(&surfsize, &surfoffset, texinfo, mip, slice); + if (res != GPA_ERR_OK) { + return res; + } + if (surfoffset + surfsize > inbuffersize || surfoffset + surfsize > outbuffersize) { + return GPA_ERR_OVERFLOW; + } + + res = gpaTileSurface((uint8_t*)outbuffer + surfoffset, surfsize, + (const uint8_t*)inbuffer + surfoffset, surfsize, &tp); + if (res != GPA_ERR_OK) { + return res; + } + + return GPA_ERR_OK; +} + +GpaError gpaTileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo) { + if (!inbuffer || !inbuffersize || !outbuffer || !outbuffersize || !texinfo) { + return GPA_ERR_INVALID_ARGS; + } + + for (uint32_t a = 0; a < texinfo->numslices; a += 1) { + for (uint32_t m = 0; m < texinfo->nummips; m += 1) { + GpaError gerr = gpaTileTextureIndexed(inbuffer, inbuffersize, outbuffer, outbuffersize, + texinfo, m, a); + if (gerr != GPA_ERR_OK) { + return gerr; + } + } + } + + return GPA_ERR_OK; +} + +GpaError gpaDetileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo, uint32_t mip, + uint32_t slice) { + if (!inbuffer || !inbuffersize || !outbuffer || !outbuffersize || !texinfo) { + return GPA_ERR_INVALID_ARGS; + } + + GpaTilingParams tp = {}; + GpaError res = gpaTpInit(&tp, texinfo, mip, slice); + if (res != GPA_ERR_OK) { + return res; + } + + uint64_t surfoffset = 0; + uint64_t surfsize = 0; + res = gpaCalcSurfaceSizeOffset(&surfsize, &surfoffset, texinfo, mip, slice); + if (res != GPA_ERR_OK) { + return res; + } + if (surfoffset + surfsize > inbuffersize || surfoffset + surfsize > outbuffersize) { + return GPA_ERR_OVERFLOW; + } + + res = gpaDetileSurface((uint8_t*)outbuffer + surfoffset, surfsize, + (const uint8_t*)inbuffer + surfoffset, surfsize, &tp); + if (res != GPA_ERR_OK) { + return res; + } + + return GPA_ERR_OK; +} + +GpaError gpaDetileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer, + size_t outbuffersize, const GpaTextureInfo* texinfo) { + if (!inbuffer || !inbuffersize || !outbuffer || !outbuffersize || !texinfo) { + return GPA_ERR_INVALID_ARGS; + } + + for (uint32_t a = 0; a < texinfo->numslices; a += 1) { + for (uint32_t m = 0; m < texinfo->nummips; m += 1) { + GpaError gerr = gpaDetileTextureIndexed(inbuffer, inbuffersize, outbuffer, + outbuffersize, texinfo, m, a); + if (gerr != GPA_ERR_OK) { + return gerr; + } + } + } + + return GPA_ERR_OK; +} diff --git a/src/video_core/amdgpu/gpuaddr/types.h b/src/video_core/amdgpu/gpuaddr/types.h new file mode 100644 index 00000000..3bd0b550 --- /dev/null +++ b/src/video_core/amdgpu/gpuaddr/types.h @@ -0,0 +1,375 @@ +// SPDX-FileCopyrightText: Copyright 2024 freegnm Project +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include "common/types.h" +#include "video_core/amdgpu/gpuaddr/dataformat.h" + +enum GnmSampleSplit { + GNM_ADDR_SAMPLE_SPLIT_1 = 0x0, + GNM_ADDR_SAMPLE_SPLIT_2 = 0x1, + GNM_ADDR_SAMPLE_SPLIT_4 = 0x2, + GNM_ADDR_SAMPLE_SPLIT_8 = 0x3, +}; + +enum GnmMicroTileMode { + GNM_SURF_DISPLAY_MICRO_TILING = 0x0, + GNM_SURF_THIN_MICRO_TILING = 0x1, + GNM_SURF_DEPTH_MICRO_TILING = 0x2, + GNM_SURF_ROTATED_MICRO_TILING = 0x3, + GNM_SURF_THICK_MICRO_TILING = 0x4, +}; + +enum GnmMacroTileMode { + GNM_MACROTILEMODE_1x4_16 = 0x0, + GNM_MACROTILEMODE_1x2_16 = 0x1, + GNM_MACROTILEMODE_1x1_16 = 0x2, + GNM_MACROTILEMODE_1x1_16_DUP = 0x3, + GNM_MACROTILEMODE_1x1_8 = 0x4, + GNM_MACROTILEMODE_1x1_4 = 0x5, + GNM_MACROTILEMODE_1x1_2 = 0x6, + GNM_MACROTILEMODE_1x1_2_DUP = 0x7, + GNM_MACROTILEMODE_1x8_16 = 0x8, + GNM_MACROTILEMODE_1x4_16_DUP = 0x9, + GNM_MACROTILEMODE_1x2_16_DUP = 0xa, + GNM_MACROTILEMODE_1x1_16_DUP2 = 0xb, + GNM_MACROTILEMODE_1x1_8_DUP = 0xc, + GNM_MACROTILEMODE_1x1_4_DUP = 0xd, + GNM_MACROTILEMODE_1x1_2_DUP2 = 0xe, + GNM_MACROTILEMODE_1x1_2_DUP3 = 0xf, +}; + +enum GnmTileMode { + GNM_TM_DEPTH_2D_THIN_64 = 0x0, + GNM_TM_DEPTH_2D_THIN_128 = 0x1, + GNM_TM_DEPTH_2D_THIN_256 = 0x2, + GNM_TM_DEPTH_2D_THIN_512 = 0x3, + GNM_TM_DEPTH_2D_THIN_1K = 0x4, + GNM_TM_DEPTH_1D_THIN = 0x5, + GNM_TM_DEPTH_2D_THIN_PRT_256 = 0x6, + GNM_TM_DEPTH_2D_THIN_PRT_1K = 0x7, + + GNM_TM_DISPLAY_LINEAR_ALIGNED = 0x8, + GNM_TM_DISPLAY_1D_THIN = 0x9, + GNM_TM_DISPLAY_2D_THIN = 0xa, + GNM_TM_DISPLAY_THIN_PRT = 0xb, + GNM_TM_DISPLAY_2D_THIN_PRT = 0xc, + + GNM_TM_THIN_1D_THIN = 0xd, + GNM_TM_THIN_2D_THIN = 0xe, + GNM_TM_THIN_3D_THIN = 0xf, + GNM_TM_THIN_THIN_PRT = 0x10, + GNM_TM_THIN_2D_THIN_PRT = 0x11, + GNM_TM_THIN_3D_THIN_PRT = 0x12, + + GNM_TM_THICK_1D_THICK = 0x13, + GNM_TM_THICK_2D_THICK = 0x14, + GNM_TM_THICK_3D_THICK = 0x15, + GNM_TM_THICK_THICK_PRT = 0x16, + GNM_TM_THICK_2D_THICK_PRT = 0x17, + GNM_TM_THICK_3D_THICK_PRT = 0x18, + GNM_TM_THICK_2D_XTHICK = 0x19, + GNM_TM_THICK_3D_XTHICK = 0x1a, + + GNM_TM_DISPLAY_LINEAR_GENERAL = 0x1f, +}; + +enum GnmArrayMode { + GNM_ARRAY_LINEAR_GENERAL = 0x0, + GNM_ARRAY_LINEAR_ALIGNED = 0x1, + GNM_ARRAY_1D_TILED_THIN1 = 0x2, + GNM_ARRAY_1D_TILED_THICK = 0x3, + GNM_ARRAY_2D_TILED_THIN1 = 0x4, + GNM_ARRAY_PRT_TILED_THIN1 = 0x5, + GNM_ARRAY_PRT_2D_TILED_THIN1 = 0x6, + GNM_ARRAY_2D_TILED_THICK = 0x7, + GNM_ARRAY_2D_TILED_XTHICK = 0x8, + GNM_ARRAY_PRT_TILED_THICK = 0x9, + GNM_ARRAY_PRT_2D_TILED_THICK = 0xa, + GNM_ARRAY_PRT_3D_TILED_THIN1 = 0xb, + GNM_ARRAY_3D_TILED_THIN1 = 0xc, + GNM_ARRAY_3D_TILED_THICK = 0xd, + GNM_ARRAY_3D_TILED_XTHICK = 0xe, + GNM_ARRAY_PRT_3D_TILED_THICK = 0xf, +}; + +enum GnmNumBanks { + GNM_SURF_2_BANK = 0x0, + GNM_SURF_4_BANK = 0x1, + GNM_SURF_8_BANK = 0x2, + GNM_SURF_16_BANK = 0x3, +}; + +enum GnmGpuMode { + GNM_GPU_BASE = 0x0, + GNM_GPU_NEO = 0x1, +}; + +enum GnmBankWidth { + GNM_SURF_BANK_WIDTH_1 = 0x0, + GNM_SURF_BANK_WIDTH_2 = 0x1, + GNM_SURF_BANK_WIDTH_4 = 0x2, + GNM_SURF_BANK_WIDTH_8 = 0x3, +}; + +enum GnmBankHeight { + GNM_SURF_BANK_HEIGHT_1 = 0x0, + GNM_SURF_BANK_HEIGHT_2 = 0x1, + GNM_SURF_BANK_HEIGHT_4 = 0x2, + GNM_SURF_BANK_HEIGHT_8 = 0x3, +}; + +enum GnmPipeConfig { + GNM_ADDR_SURF_P2 = 0x0, + GNM_ADDR_SURF_P4_8x16 = 0x4, + GNM_ADDR_SURF_P4_16x16 = 0x5, + GNM_ADDR_SURF_P4_16x32 = 0x6, + GNM_ADDR_SURF_P4_32x32 = 0x7, + GNM_ADDR_SURF_P8_16x16_8x16 = 0x8, + GNM_ADDR_SURF_P8_16x32_8x16 = 0x9, + GNM_ADDR_SURF_P8_32x32_8x16 = 0xa, + GNM_ADDR_SURF_P8_16x32_16x16 = 0xb, + GNM_ADDR_SURF_P8_32x32_16x16 = 0xc, + GNM_ADDR_SURF_P8_32x32_16x32 = 0xd, + GNM_ADDR_SURF_P8_32x64_32x32 = 0xe, + GNM_ADDR_SURF_P16_32x32_8x16 = 0x10, + GNM_ADDR_SURF_P16_32x32_16x16 = 0x11, +}; + +enum GnmMacroTileAspect { + GNM_SURF_MACRO_ASPECT_1 = 0x0, + GNM_SURF_MACRO_ASPECT_2 = 0x1, + GNM_SURF_MACRO_ASPECT_4 = 0x2, + GNM_SURF_MACRO_ASPECT_8 = 0x3, +}; + +enum GnmTileSplit { + GNM_SURF_TILE_SPLIT_64B = 0x0, + GNM_SURF_TILE_SPLIT_128B = 0x1, + GNM_SURF_TILE_SPLIT_256B = 0x2, + GNM_SURF_TILE_SPLIT_512B = 0x3, + GNM_SURF_TILE_SPLIT_1KB = 0x4, + GNM_SURF_TILE_SPLIT_2KB = 0x5, + GNM_SURF_TILE_SPLIT_4KB = 0x6, +}; + +enum GpaSurfaceType { + GPA_SURFACE_COLORDISPLAY, + GPA_SURFACE_COLOR, + GPA_SURFACE_DEPTHSTENCIL, + GPA_SURFACE_DEPTH, + GPA_SURFACE_STENCIL, + GPA_SURFACE_FMASK, + GPA_SURFACE_TEXTUREFLAT, + GPA_SURFACE_TEXTUREVOLUME, + GPA_SURFACE_TEXTURECUBEMAP, + GPA_SURFACE_RWTEXTUREFLAT, + GPA_SURFACE_RWTEXTUREVOLUME, + GPA_SURFACE_RWTEXTURECUBEMAP, +}; + +struct GpaSurfaceFlags { + u32 colortarget : 1; + u32 depthtarget : 1; + u32 stenciltarget : 1; + u32 texture : 1; + u32 cube : 1; + u32 volume : 1; + u32 fmask : 1; + u32 cubeasarray : 1; + u32 overlay : 1; + u32 display : 1; + u32 prt : 1; + u32 pow2pad : 1; + u32 texcompatible : 1; + u32 _unused : 19; +}; +static_assert(sizeof(GpaSurfaceFlags) == 0x4, ""); + +struct GpaSurfaceProperties { + GnmTileMode tilemode; + GpaSurfaceFlags flags; +}; + +struct GpaHtileParams { + u32 pitch; + u32 height; + u32 numslices; + u32 numfrags; + u32 bpp; + + GnmArrayMode arraymode; + GnmNumBanks banks; + GnmPipeConfig pipeconfig; + + GnmGpuMode mingpumode; + + struct { + u32 tccompatible : 1; + u32 reserved : 31; + } flags; +}; + +struct GpaCmaskParams { + u32 pitch; + u32 height; + u32 numslices; + u32 numfrags; + u32 bpp; + GnmTileMode tilemode; + + GnmGpuMode mingpumode; + + struct { + u32 tccompatible : 1; + u32 reserved : 31; + } flags; +}; + +struct GpaFmaskParams { + u32 pitch; + u32 height; + u32 numslices; + u32 numfrags; + u32 bpp; + GnmTileMode tilemode; + + GnmGpuMode mingpumode; + + bool isblockcompressed; +}; + +struct GpaTileInfo { + GnmArrayMode arraymode; + GnmNumBanks banks; + GnmBankWidth bankwidth; + GnmBankHeight bankheight; + GnmMacroTileAspect macroaspectratio; + GnmTileSplit tilesplit; + GnmPipeConfig pipeconfig; +}; + +struct GpaSurfaceInfo { + u32 pitch; + u32 height; + u32 depth; + uint64_t surfacesize; + u32 basealign; + u32 pitchalign; + u32 heightalign; + u32 depthalign; + u32 bitsperelem; + + u32 blockwidth; + u32 blockheight; + + GnmTileMode tilemode; + GpaTileInfo tileinfo; + + struct { + u32 istexcompatible : 1; + u32 _unused : 31; + }; +}; + +struct GpaHtileInfo { + u32 pitch; + u32 height; + u32 basealign; + u32 bpp; + u32 macrowidth; + u32 macroheight; + uint64_t htilebytes; + uint64_t slicebytes; +}; + +struct GpaCmaskInfo { + u32 pitch; + u32 height; + u32 basealign; + u32 bpp; + u32 macrowidth; + u32 macroheight; + u32 blockmax; + uint64_t cmaskbytes; + uint64_t slicebytes; +}; + +struct GpaFmaskInfo { + u32 pitch; + u32 height; + u32 basealign; + u32 pitchalign; + u32 heightalign; + u32 bpp; + uint64_t fmaskbytes; + uint64_t slicebytes; +}; + +struct GpaSurfaceIndex { + u32 arrayindex; + u32 face; + u32 mip; + u32 depth; + u32 fragment; + u32 sample; +}; + +struct GpaTilingParams { + GnmTileMode tilemode; + GnmGpuMode mingpumode; + + u32 linearwidth; + u32 linearheight; + u32 lineardepth; + u32 numfragsperpixel; + u32 basetiledpitch; + + u32 miplevel; + u32 arrayslice; + GpaSurfaceFlags surfaceflags; + u32 bitsperfrag; + bool isblockcompressed; +}; + +struct GpaSurfaceRegion { + u32 left; // -X + u32 top; // -Y + u32 front; // -Z + + u32 right; // +X + u32 bottom; // +Y + u32 back; // +Z +}; + +enum GnmTextureType { + GNM_TEXTURE_1D = 0x8, + GNM_TEXTURE_2D = 0x9, + GNM_TEXTURE_3D = 0xa, + GNM_TEXTURE_CUBEMAP = 0xb, + GNM_TEXTURE_1D_ARRAY = 0xc, + GNM_TEXTURE_2D_ARRAY = 0xd, + GNM_TEXTURE_2D_MSAA = 0xe, + GNM_TEXTURE_2D_ARRAY_MSAA = 0xf, +}; + +struct GpaTextureInfo { + GnmTextureType type; + GnmDataFormat fmt; + + u32 width; + u32 height; + u32 pitch; + u32 depth; + + u32 numfrags; + u32 nummips; + u32 numslices; + + GnmTileMode tm; + GnmGpuMode mingpumode; + + bool pow2pad; +}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index ae7d53e3..0b77ecf2 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,11 +10,9 @@ #include "video_core/amdgpu/pixel_format.h" #include -#include #include -#include -#include #include +#include #include #include diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index e9b7a553..67fc82ec 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/types.h" #include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/gpuaddr/gpuaddr.h" namespace AmdGpu { @@ -132,10 +133,21 @@ struct Image { } u32 NumLayers() const { - return last_array - base_array + 1; + u32 slices = type == ImageType::Color3D ? 1 : depth.Value() + 1; + if (type == ImageType::Cube) { + slices *= 6; + } + if (pow2pad) { + slices = std::bit_ceil(slices); + } + return slices; } u32 NumLevels() const { + if (type == ImageType::Color2DMsaa || + type == ImageType::Color2DMsaaArray) { + return 1; + } return last_level + 1; } @@ -155,9 +167,29 @@ struct Image { return GetTilingMode() != TilingMode::Display_Linear; } - [[nodiscard]] size_t GetSizeAligned() const { - // TODO: Derive this properly from tiling params - return (width + 1) * (height + 1) * NumComponents(GetDataFmt()); + [[nodiscard]] size_t GetSizeAligned(const GpaTextureInfo& texinfo) const { + GpaTilingParams tp = {}; + GpaError err = gpaTpInit(&tp, &texinfo, 0, 0); + ASSERT(err == GPA_ERR_OK); + + GpaSurfaceInfo surfinfo = {}; + size_t size = {}; + for (uint32_t i = 0; i < NumLevels(); i += 1) { + tp.linearwidth = std::max(texinfo.width >> i, 1U); + tp.linearheight = std::max(texinfo.height >> i, 1U); + tp.lineardepth = std::max(texinfo.depth >> i, 1U); + tp.miplevel = i; + + err = gpaComputeSurfaceInfo(&surfinfo, &tp); + ASSERT(err == GPA_ERR_OK); + + size += NumLayers() * surfinfo.surfacesize; + if (tp.linearwidth == 1 && tp.linearheight == 1 && + tp.lineardepth == 1) { + break; + } + } + return size; } }; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index d0dc6872..cb54e05a 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -291,7 +291,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { - return vk::Format::eB8G8R8A8Srgb; + return vk::Format::eR8G8B8A8Srgb; } if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Float) { @@ -324,7 +324,8 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc3UnormBlock; } - if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uint) { + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR8G8B8A8Uint; } if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b6764db7..893c6128 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -64,7 +64,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { - LOG_WARNING(Render_Vulkan, "Rectangle List primitive type is only supported for embedded VS"); + LOG_WARNING(Render_Vulkan, + "Rectangle List primitive type is only supported for embedded VS"); } const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d811c42c..50a7cfde 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -41,9 +41,11 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers) physical_devices{instance->enumeratePhysicalDevices()} {} Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index) - : instance{CreateInstance(dl, window.getWindowInfo().type, true, false)}, + : enable_validation{false}, instance{CreateInstance(dl, window.getWindowInfo().type, enable_validation, false)}, physical_devices{instance->enumeratePhysicalDevices()} { - debug_callback = CreateDebugCallback(*instance); + if (enable_validation) { + debug_callback = CreateDebugCallback(*instance); + } const std::size_t num_physical_devices = static_cast(physical_devices.size()); ASSERT_MSG(num_physical_devices > 0, "No physical devices found"); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 28af5405..4bd91c2d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -194,6 +194,7 @@ private: private: vk::DynamicLoader dl; + bool enable_validation{}; vk::UniqueInstance instance; vk::PhysicalDevice physical_device; vk::UniqueDevice device; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 95762e66..f98084c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -112,7 +112,8 @@ void PipelineCache::RefreshGraphicsKey() { key.color_formats[remapped_cb] = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; ++remapped_cb; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 12cdc0cb..dc3c6875 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -60,7 +60,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { }); } if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { - const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); + const auto& image_view = + texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); depth_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eGeneral, @@ -91,7 +92,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices; + const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList + ? 4 + : regs.num_indices; cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); @@ -175,7 +178,7 @@ void Rasterizer::UpdateViewportScissorState() { .y = regs.viewports[0].yoffset - regs.viewports[0].yscale, .width = regs.viewports[0].xscale * 2.0f, .height = regs.viewports[0].yscale * 2.0f, - .minDepth = /*regs.viewports[0].zoffset - regs.viewports[0].zscale*/0.f, + .minDepth = /*regs.viewports[0].zoffset - regs.viewports[0].zscale*/ 0.f, .maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset, }; const vk::Rect2D scissor{ diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 4391076b..23f55eed 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -9,8 +9,8 @@ #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/tile_manager.h" -#include #include +#include namespace VideoCore { @@ -41,7 +41,8 @@ static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) { if (format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD32Sfloat) { usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { - if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock && format != vk::Format::eBc1RgbaUnormBlock) { + if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock && + format != vk::Format::eBc1RgbaUnormBlock) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } } @@ -54,10 +55,10 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { return vk::ImageType::e1D; case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color1DArray: + case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Cube: return vk::ImageType::e2D; case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Color2DArray: return vk::ImageType::e3D; default: UNREACHABLE(); @@ -121,7 +122,28 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { pitch = image.Pitch(); resources.levels = image.NumLevels(); resources.layers = image.NumLayers(); - guest_size_bytes = image.GetSizeAligned(); + texinfo = GpaTextureInfo{ + .type = static_cast(image.type.Value()), + .fmt = { + .surfacefmt = static_cast(image.data_format.Value()), + .chantype = static_cast(image.num_format.Value()), + .chanx = static_cast(image.dst_sel_x.Value()), + .chany = static_cast(image.dst_sel_y.Value()), + .chanz = static_cast(image.dst_sel_z.Value()), + .chanw = static_cast(image.dst_sel_w.Value()), + }, + .width = static_cast(image.width.Value() + 1), + .height = static_cast(image.height.Value() + 1), + .pitch = image.Pitch(), + .depth = 1, + .numfrags = 1, + .nummips = image.NumLevels(), + .numslices = image.NumLayers(), + .tm = static_cast(image.tiling_index.Value()), + .mingpumode = GNM_GPU_BASE, + .pow2pad = bool(image.pow2pad.Value()), + }; + guest_size_bytes = image.GetSizeAligned(texinfo); } UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) @@ -165,21 +187,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, if (info.type == vk::ImageType::e3D) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } - if (info.is_tiled) { - flags |= vk::ImageCreateFlagBits::eExtendedUsage; - if (false) { // IsBlockCodedFormat() - flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; - } - } - if (info.pixel_format == vk::Format::eR16Sscaled) { - info.is_tiled = false; - flags = {}; - } info.usage = ImageUsageFlags(info.pixel_format); - if ((info.is_tiled && (info.pixel_format != vk::Format::eBc3UnormBlock) && info.pixel_format != vk::Format::eBc1RgbaSrgbBlock) || info.is_storage) { - info.usage |= vk::ImageUsageFlagBits::eStorage; - } if (info.pixel_format == vk::Format::eD32Sfloat) { aspect_mask = vk::ImageAspectFlagBits::eDepth; } @@ -221,24 +230,26 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags ds return; } - const vk::ImageMemoryBarrier barrier = {.srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }}; + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = access_mask, + .dstAccessMask = dst_mask, + .oldLayout = layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; // Adjust pipieline stage - vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead || - dst_mask == vk::AccessFlagBits::eTransferWrite) + const vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead || + dst_mask == vk::AccessFlagBits::eTransferWrite) ? vk::PipelineStageFlagBits::eTransfer : vk::PipelineStageFlagBits::eAllGraphics; const auto cmdbuf = scheduler->CommandBuffer(); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 64bcfbd3..a62d3d92 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -8,6 +8,7 @@ #include "core/libraries/videoout/buffer.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" +#include "video_core/amdgpu/gpuaddr/gpuaddr.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -52,6 +53,7 @@ struct ImageInfo { u32 pitch = 0; u32 guest_size_bytes = 0; AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear}; + GpaTextureInfo texinfo{}; }; struct UniqueImage { diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 532e4291..96686b87 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -3,8 +3,8 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/image.h" +#include "video_core/texture_cache/image_view.h" namespace VideoCore { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 43f13932..7c518e9c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "common/config.h" +#include "common/error.h" #include "core/virtual_memory.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -23,7 +24,7 @@ void mprotect(void* addr, size_t len, int prot) { DWORD old_prot{}; BOOL result = VirtualProtect(addr, len, prot, &old_prot); - ASSERT_MSG(result != 0, "Region protection failed"); + ASSERT_MSG(result != 0, "Region protection failed {}", Common::GetLastErrorMsg()); } #endif @@ -153,8 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage; } - const ImageViewId view_id = - slot_image_views.insert(instance, view_info, image, usage_override); + const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; @@ -193,81 +193,58 @@ void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - { - if (!tile_manager.TryDetile(image)) { - // Upload data to the staging buffer. - const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); - const u8* image_data = reinterpret_cast(image.cpu_addr); - std::memcpy(data, image_data, image.info.guest_size_bytes); - staging.Commit(image.info.guest_size_bytes); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {image.info.size.width, image.info.size.height, 1}, - }; - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); - } - - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); - return; - } - const u8* image_data = reinterpret_cast(image.cpu_addr); - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = image.info.size.width >> m; - const u32 height = image.info.size.height >> m; - const u32 map_size = width * height * image.info.resources.layers; + const auto [staging_data, offset, _] = staging.Map(image.info.guest_size_bytes, 16); + if (image.info.texinfo.tm == GnmTileMode::GNM_TM_DISPLAY_LINEAR_GENERAL) { + std::memcpy(staging_data, image_data, image.info.guest_size_bytes); + } else { + const GpaError res = gpaDetileTextureAll(image_data, image.info.guest_size_bytes, staging_data, + image.info.guest_size_bytes, &image.info.texinfo); + ASSERT_MSG(res == GPA_ERR_OK, "Texture detiling failed with error: {}", gpaStrError(res)); + } + staging.Commit(image.info.guest_size_bytes); - // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(map_size, 16); - if (image.info.is_tiled) { - ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); - } else { - std::memcpy(data, image_data, map_size); - } - staging.Commit(map_size); - image_data += map_size; + // The mipmaps of each slice are next to each other in memory. So we iterate each layer + // and detile its mipmaps. Vulkan allows us to copy to the same mipmap of multiple layers at + // once, so we try to upload in that order. + boost::container::small_vector image_copies; + for (u32 mip = 0; mip < image.info.resources.levels; mip++) { + // Initialize tiling parameters. + GpaTilingParams tp = {}; + GpaError res = gpaTpInit(&tp, &image.info.texinfo, mip, 0); + ASSERT(res == GPA_ERR_OK); - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, + // Figure out the offset of the slice0 mip in the image data and its size. + u64 surfoffset = 0; + u64 surfsize = 0; + res = gpaCalcSurfaceSizeOffset(&surfsize, &surfoffset, &image.info.texinfo, mip, 0); + ASSERT(res == GPA_ERR_OK); + + // Add a new buffer copy for later. + image_copies.push_back({ + .bufferOffset = offset + surfoffset, .bufferRowLength = 0, .bufferImageHeight = 0, .imageSubresource{ .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = m, + .mipLevel = mip, .baseArrayLayer = 0, .layerCount = u32(image.info.resources.layers), }, .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, 1}, - }; - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); - - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + .imageExtent = {image.info.size.width >> mip, image.info.size.height >> mip, 1}, + }); } + + // Perform copy. + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + cmdbuf.copyBufferToImage(staging.Handle(), image.image, + vk::ImageLayout::eTransferDstOptimal, image_copies); + + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0a4ed2b7..20ae5bd5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,8 @@ public: void OnCpuWrite(VAddr address); /// Retrieves the image handle of the image with the provided attributes and address. - [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create = true); + [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, + bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);