amdgpu: Add freegnm detiler

This commit is contained in:
raphaelthegreat 2024-06-07 02:14:52 +03:00
parent 6cec16225d
commit f291674790
33 changed files with 5293 additions and 135 deletions

View File

@ -377,6 +377,18 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/amdgpu/pm4_cmds.h
src/video_core/amdgpu/pm4_opcodes.h
src/video_core/amdgpu/resource.h
src/video_core/amdgpu/gpuaddr/dataformat.cpp
src/video_core/amdgpu/gpuaddr/dataformat.h
src/video_core/amdgpu/gpuaddr/element.cpp
src/video_core/amdgpu/gpuaddr/error.cpp
src/video_core/amdgpu/gpuaddr/error.h
src/video_core/amdgpu/gpuaddr/gpuaddr.h
src/video_core/amdgpu/gpuaddr/gpuaddr_private.h
src/video_core/amdgpu/gpuaddr/surface.cpp
src/video_core/amdgpu/gpuaddr/surfgen.cpp
src/video_core/amdgpu/gpuaddr/tilemodes.cpp
src/video_core/amdgpu/gpuaddr/tiler.cpp
src/video_core/amdgpu/gpuaddr/types.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
src/video_core/renderer_vulkan/liverpool_to_vk.h
src/video_core/renderer_vulkan/renderer_vulkan.cpp

View File

@ -1037,7 +1037,6 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
return linker->TlsGetAddr(index->ti_module, index->ti_offset);
}
int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
if (cond == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;

View File

@ -12,14 +12,17 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
const auto info = inst->Flags<IR::TextureInstInfo>();
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset) {
// TODO
return EmitImageSampleImplicitLod(ctx, inst, handle, coords, bias_lc, offset);
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,

View File

@ -22,9 +22,15 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::U32 dword_offset =
smrd.imm ? ir.Imm32(smrd.offset) : (smrd.offset == SQ_SRC_LITERAL ? ir.Imm32(inst.src[1].code)
: ir.GetScalarReg(IR::ScalarReg(smrd.offset)));
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return ir.Imm32(inst.src[1].code);
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View File

@ -25,8 +25,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
}
void Translator::V_MUL_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
}
void Translator::V_CNDMASK_B32(const GcnInst& inst) {

View File

@ -0,0 +1,387 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "common/assert.h"
#include "video_core/amdgpu/gpuaddr/dataformat.h"
GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags) {
GnmDataFormat res = {
.surfacefmt = GNM_IMG_DATA_FORMAT_INVALID,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_X,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
switch (numsamples) {
case 1:
// invalid
break;
case 2:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F2;
break;
case 4:
case 8:
// invalid
break;
default:
abort();
}
break;
case 4:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F4;
break;
case 8:
// invalid
break;
default:
abort();
}
break;
case 8:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S8_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S8_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F4;
break;
case 8:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F8;
break;
default:
abort();
}
break;
case 16:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S16_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S16_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F4;
break;
case 8:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F8;
break;
default:
abort();
}
break;
default:
UNREACHABLE();
}
if (numsamples == 16) {
res.chany = GNM_CHAN_Y;
res.chanz = GNM_CHAN_CONSTANT1;
}
return res;
}
GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt) {
GnmImageFormat surfmt = GNM_IMG_DATA_FORMAT_INVALID;
GnmImgNumFormat chantype = GNM_IMG_NUM_FORMAT_UNORM;
switch (zfmt) {
case GNM_Z_INVALID:
default:
// surfmt = GNM_IMG_DATA_FORMAT_INVALID;
// chantype = GNM_IMG_NUM_FORMAT_UNORM;
break;
case GNM_Z_16:
surfmt = GNM_IMG_DATA_FORMAT_16;
// chantype = GNM_IMG_NUM_FORMAT_UNORM;
break;
case GNM_Z_32_FLOAT:
surfmt = GNM_IMG_DATA_FORMAT_32;
chantype = GNM_IMG_NUM_FORMAT_FLOAT;
break;
}
GnmDataFormat res = {
.surfacefmt = surfmt,
.chantype = chantype,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
return res;
}
uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_INVALID:
return 0;
case GNM_IMG_DATA_FORMAT_8:
case GNM_IMG_DATA_FORMAT_16:
case GNM_IMG_DATA_FORMAT_32:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 1;
case GNM_IMG_DATA_FORMAT_8_8:
case GNM_IMG_DATA_FORMAT_16_16:
case GNM_IMG_DATA_FORMAT_32_32:
case GNM_IMG_DATA_FORMAT_8_24:
case GNM_IMG_DATA_FORMAT_24_8:
case GNM_IMG_DATA_FORMAT_X24_8_32:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4:
case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1:
case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8:
case GNM_IMG_DATA_FORMAT_4_4:
return 2;
case GNM_IMG_DATA_FORMAT_10_11_11:
case GNM_IMG_DATA_FORMAT_11_11_10:
case GNM_IMG_DATA_FORMAT_32_32_32:
case GNM_IMG_DATA_FORMAT_5_6_5:
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
case GNM_IMG_DATA_FORMAT_5_9_9_9:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_6_5_5:
return 3;
case GNM_IMG_DATA_FORMAT_10_10_10_2:
case GNM_IMG_DATA_FORMAT_2_10_10_10:
case GNM_IMG_DATA_FORMAT_8_8_8_8:
case GNM_IMG_DATA_FORMAT_16_16_16_16:
case GNM_IMG_DATA_FORMAT_32_32_32_32:
case GNM_IMG_DATA_FORMAT_1_5_5_5:
case GNM_IMG_DATA_FORMAT_5_5_5_1:
case GNM_IMG_DATA_FORMAT_4_4_4_4:
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
default:
UNREACHABLE();
}
}
uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_INVALID:
return 0;
case GNM_IMG_DATA_FORMAT_8:
return 8;
case GNM_IMG_DATA_FORMAT_16:
case GNM_IMG_DATA_FORMAT_8_8:
return 16;
case GNM_IMG_DATA_FORMAT_32:
case GNM_IMG_DATA_FORMAT_16_16:
case GNM_IMG_DATA_FORMAT_10_11_11:
case GNM_IMG_DATA_FORMAT_11_11_10:
case GNM_IMG_DATA_FORMAT_10_10_10_2:
case GNM_IMG_DATA_FORMAT_2_10_10_10:
case GNM_IMG_DATA_FORMAT_8_8_8_8:
return 32;
case GNM_IMG_DATA_FORMAT_32_32:
case GNM_IMG_DATA_FORMAT_16_16_16_16:
return 64;
case GNM_IMG_DATA_FORMAT_32_32_32:
return 96;
case GNM_IMG_DATA_FORMAT_32_32_32_32:
return 128;
case GNM_IMG_DATA_FORMAT_5_6_5:
case GNM_IMG_DATA_FORMAT_1_5_5_5:
case GNM_IMG_DATA_FORMAT_5_5_5_1:
case GNM_IMG_DATA_FORMAT_4_4_4_4:
return 16;
case GNM_IMG_DATA_FORMAT_8_24:
case GNM_IMG_DATA_FORMAT_24_8:
return 32;
case GNM_IMG_DATA_FORMAT_X24_8_32:
return 64;
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
return 16;
case GNM_IMG_DATA_FORMAT_5_9_9_9:
return 32;
case GNM_IMG_DATA_FORMAT_BC1:
return 4;
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
return 8;
case GNM_IMG_DATA_FORMAT_BC4:
return 4;
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 8;
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4:
return 8;
case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1:
case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2:
return 16;
case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8:
return 32;
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8:
return 64;
case GNM_IMG_DATA_FORMAT_4_4:
return 8;
case GNM_IMG_DATA_FORMAT_6_5_5:
return 16;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 1;
default:
UNREACHABLE();
}
}
bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out) {
switch (datafmt.chantype) {
case GNM_IMG_NUM_FORMAT_UNORM:
*out = GNM_NUMBER_UNORM;
break;
case GNM_IMG_NUM_FORMAT_SNORM:
*out = GNM_NUMBER_SNORM;
break;
*out = GNM_NUMBER_UINT;
case GNM_IMG_NUM_FORMAT_UINT:
break;
case GNM_IMG_NUM_FORMAT_SINT:
*out = GNM_NUMBER_SINT;
break;
case GNM_IMG_NUM_FORMAT_FLOAT:
*out = GNM_NUMBER_FLOAT;
break;
case GNM_IMG_NUM_FORMAT_SRGB:
*out = GNM_NUMBER_SRGB;
break;
default:
return false;
}
return true;
}
bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out) {
const uint32_t numcomps = gnmDfGetNumComponents(datafmt);
const GnmChannel cx = datafmt.chanx;
const GnmChannel cy = datafmt.chany;
const GnmChannel cz = datafmt.chanz;
const GnmChannel cw = datafmt.chanw;
if (numcomps == 1) {
if (cx == GNM_CHAN_X) {
*out = GNM_SWAP_STD;
return true;
} else if (cy == GNM_CHAN_X) {
*out = GNM_SWAP_ALT;
return true;
} else if (cz == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
} else if (numcomps == 2) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_X && cw == GNM_CHAN_Y) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
}
} else if (numcomps == 3) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cw == GNM_CHAN_Z) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
} else if (numcomps == 4) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z && cw == GNM_CHAN_W) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X && cw == GNM_CHAN_W) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_W && cy == GNM_CHAN_Z && cz == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_Z && cz == GNM_CHAN_W && cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
}
return false;
}
GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_16:
return GNM_Z_16;
case GNM_IMG_DATA_FORMAT_24_8:
return GNM_Z_24;
case GNM_IMG_DATA_FORMAT_32:
return GNM_Z_32_FLOAT;
default:
return GNM_Z_INVALID;
}
}
GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_8:
return GNM_STENCIL_8;
default:
return GNM_STENCIL_INVALID;
}
}

View File

@ -0,0 +1,409 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include "common/types.h"
enum GnmSurfaceNumber {
GNM_NUMBER_UNORM = 0x0,
GNM_NUMBER_SNORM = 0x1,
GNM_NUMBER_UINT = 0x4,
GNM_NUMBER_SINT = 0x5,
GNM_NUMBER_SRGB = 0x6,
GNM_NUMBER_FLOAT = 0x7,
};
enum GnmImageFormat {
GNM_IMG_DATA_FORMAT_INVALID = 0x0,
GNM_IMG_DATA_FORMAT_8 = 0x1,
GNM_IMG_DATA_FORMAT_16 = 0x2,
GNM_IMG_DATA_FORMAT_8_8 = 0x3,
GNM_IMG_DATA_FORMAT_32 = 0x4,
GNM_IMG_DATA_FORMAT_16_16 = 0x5,
GNM_IMG_DATA_FORMAT_10_11_11 = 0x6,
GNM_IMG_DATA_FORMAT_11_11_10 = 0x7,
GNM_IMG_DATA_FORMAT_10_10_10_2 = 0x8,
GNM_IMG_DATA_FORMAT_2_10_10_10 = 0x9,
GNM_IMG_DATA_FORMAT_8_8_8_8 = 0xa,
GNM_IMG_DATA_FORMAT_32_32 = 0xb,
GNM_IMG_DATA_FORMAT_16_16_16_16 = 0xc,
GNM_IMG_DATA_FORMAT_32_32_32 = 0xd,
GNM_IMG_DATA_FORMAT_32_32_32_32 = 0xe,
GNM_IMG_DATA_FORMAT_5_6_5 = 0x10,
GNM_IMG_DATA_FORMAT_1_5_5_5 = 0x11,
GNM_IMG_DATA_FORMAT_5_5_5_1 = 0x12,
GNM_IMG_DATA_FORMAT_4_4_4_4 = 0x13,
GNM_IMG_DATA_FORMAT_8_24 = 0x14,
GNM_IMG_DATA_FORMAT_24_8 = 0x15,
GNM_IMG_DATA_FORMAT_X24_8_32 = 0x16,
GNM_IMG_DATA_FORMAT_GB_GR = 0x20,
GNM_IMG_DATA_FORMAT_BG_RG = 0x21,
GNM_IMG_DATA_FORMAT_5_9_9_9 = 0x22,
GNM_IMG_DATA_FORMAT_BC1 = 0x23,
GNM_IMG_DATA_FORMAT_BC2 = 0x24,
GNM_IMG_DATA_FORMAT_BC3 = 0x25,
GNM_IMG_DATA_FORMAT_BC4 = 0x26,
GNM_IMG_DATA_FORMAT_BC5 = 0x27,
GNM_IMG_DATA_FORMAT_BC6 = 0x28,
GNM_IMG_DATA_FORMAT_BC7 = 0x29,
GNM_IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d,
GNM_IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e,
GNM_IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31,
GNM_IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32,
GNM_IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33,
GNM_IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34,
GNM_IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35,
GNM_IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36,
GNM_IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37,
GNM_IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38,
GNM_IMG_DATA_FORMAT_4_4 = 0x39,
GNM_IMG_DATA_FORMAT_6_5_5 = 0x3a,
GNM_IMG_DATA_FORMAT_1 = 0x3b,
GNM_IMG_DATA_FORMAT_1_REVERSED = 0x3c,
GNM_IMG_DATA_FORMAT_32_AS_8 = 0x3d,
GNM_IMG_DATA_FORMAT_32_AS_8_8 = 0x3e,
GNM_IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f,
};
enum GnmImgNumFormat {
GNM_IMG_NUM_FORMAT_UNORM = 0x0,
GNM_IMG_NUM_FORMAT_SNORM = 0x1,
GNM_IMG_NUM_FORMAT_USCALED = 0x2,
GNM_IMG_NUM_FORMAT_SSCALED = 0x3,
GNM_IMG_NUM_FORMAT_UINT = 0x4,
GNM_IMG_NUM_FORMAT_SINT = 0x5,
GNM_IMG_NUM_FORMAT_SNORM_OGL = 0x6,
GNM_IMG_NUM_FORMAT_FLOAT = 0x7,
GNM_IMG_NUM_FORMAT_SRGB = 0x9,
GNM_IMG_NUM_FORMAT_UBNORM = 0xa,
GNM_IMG_NUM_FORMAT_UBNORM_OGL = 0xb,
GNM_IMG_NUM_FORMAT_UBINT = 0xc,
GNM_IMG_NUM_FORMAT_UBSCALED = 0xd,
};
enum GnmZFormat {
GNM_Z_INVALID = 0x0,
GNM_Z_16 = 0x1,
GNM_Z_24 = 0x2,
GNM_Z_32_FLOAT = 0x3,
};
enum GnmStencilFormat {
GNM_STENCIL_INVALID = 0x0,
GNM_STENCIL_8 = 0x1,
};
enum GnmChannel {
GNM_CHAN_CONSTANT0 = 0x0,
GNM_CHAN_CONSTANT1 = 0x1,
GNM_CHAN_X = 0x4,
GNM_CHAN_Y = 0x5,
GNM_CHAN_Z = 0x6,
GNM_CHAN_W = 0x7,
};
enum GnmSurfaceSwap {
GNM_SWAP_STD = 0x0,
GNM_SWAP_ALT = 0x1,
GNM_SWAP_STD_REV = 0x2,
GNM_SWAP_ALT_REV = 0x3,
};
union GnmDataFormat {
struct {
GnmImageFormat surfacefmt : 8;
GnmImgNumFormat chantype : 4;
GnmChannel chanx : 3;
GnmChannel chany : 3;
GnmChannel chanz : 3;
GnmChannel chanw : 3;
uint32_t _unused : 8;
};
uint32_t asuint;
};
static_assert(sizeof(GnmDataFormat) == 0x4, "");
GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags);
GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt);
static inline GnmDataFormat gnmDfInitFromStencil(GnmStencilFormat stencilfmt,
GnmImgNumFormat chantype) {
GnmDataFormat res = {
.surfacefmt =
stencilfmt == GNM_STENCIL_8 ? GNM_IMG_DATA_FORMAT_8 : GNM_IMG_DATA_FORMAT_INVALID,
.chantype = chantype,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_X,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_X,
};
return res;
}
static inline uint32_t gnmDfGetTexelsPerElement(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 16;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 8;
default:
return 1;
}
}
uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt);
uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt);
static inline uint32_t gnmDfGetTotalBitsPerElement(const GnmDataFormat fmt) {
const uint32_t bitsperelem = gnmDfGetBitsPerElement(fmt);
const uint32_t texelsperelem = gnmDfGetTexelsPerElement(fmt);
return bitsperelem * texelsperelem;
}
static inline uint32_t gnmDfGetBytesPerElement(const GnmDataFormat datafmt) {
return gnmDfGetBitsPerElement(datafmt) / 8;
}
static inline uint32_t gnmDfGetTotalBytesPerElement(const GnmDataFormat fmt) {
return gnmDfGetTotalBitsPerElement(fmt) / 8;
}
static inline bool gnmDfIsBlockCompressed(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return true;
default:
return false;
}
}
bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out);
bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out);
GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt);
GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt);
static inline uint32_t gnmDfGetTexelsPerElementWide(const GnmDataFormat fmt) {
switch (fmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 8;
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
return 2;
default:
return 1;
}
}
static inline uint32_t gnmDfGetTexelsPerElementTall(const GnmDataFormat fmt) {
switch (fmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
default:
return 1;
}
}
static const GnmDataFormat GNM_FMT_INVALID = {
.surfacefmt = GNM_IMG_DATA_FORMAT_INVALID,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_CONSTANT0,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT0,
};
static const GnmDataFormat GNM_FMT_R8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_CONSTANT0,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_X,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_UINT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UINT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_B8G8R8A8_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_Z,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_B8G8R8A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_Z,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R16_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R16G16B16A16_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R16G16B16A16_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32B32_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT0,
};
static const GnmDataFormat GNM_FMT_R32G32B32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_BC6_SNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC6,
.chantype = GNM_IMG_NUM_FORMAT_SNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_BC6_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC6,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_BC7_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC7,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_BC7_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC7,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};

View File

@ -0,0 +1,82 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
uint64_t gpaComputeSurfaceAddrFromCoordLinear(
uint32_t x, ///< [in] x coord
uint32_t y, ///< [in] y coord
uint32_t slice, ///< [in] slice/depth index
uint32_t sample, ///< [in] sample index
uint32_t bpp, ///< [in] bits per pixel
uint32_t pitch, ///< [in] pitch
uint32_t height, ///< [in] height
uint32_t numSlices, ///< [in] number of slices
uint32_t* pBitPosition ///< [out] bit position inside a byte
) {
const uint64_t sliceSize = (uint64_t)pitch * height;
uint64_t sliceOffset = (slice + sample * numSlices) * sliceSize;
uint64_t rowOffset = (uint64_t)y * pitch;
uint64_t pixOffset = x;
uint64_t addr = (sliceOffset + rowOffset + pixOffset) * bpp;
if (pBitPosition) {
*pBitPosition = (uint32_t)(addr % 8);
}
addr /= 8;
return addr;
}
GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex,
uint32_t miplevel, uint32_t arrayslice) {
if (!tex) {
return GPA_ERR_INVALID_ARGS;
}
const uint32_t numarrayslices = tex->numslices;
const uint32_t basewidth = tex->width;
const uint32_t baseheight = tex->height;
const uint32_t basedepth = tex->depth;
const uint32_t basepitch = tex->pitch;
GpaTilingParams tp = {};
GpaError res = gpaTpInit(&tp, tex, 0, arrayslice);
if (res != GPA_ERR_OK) {
return res;
}
GpaSurfaceInfo si = {0};
uint32_t finaloffset = 0;
uint32_t finalsize = 0;
for (uint32_t m = 0; m <= miplevel; m += 1) {
finaloffset += numarrayslices * finalsize;
tp.linearwidth = std::max(basewidth >> m, 1U);
tp.linearheight = std::max(baseheight >> m, 1U);
tp.lineardepth = basedepth;
tp.basetiledpitch = basepitch;
tp.miplevel = m;
res = gpaComputeSurfaceInfo(&si, &tp);
if (res != GPA_ERR_OK) {
return res;
}
finalsize = si.surfacesize;
}
finaloffset += si.surfacesize * arrayslice;
if (outsize) {
*outsize = finalsize;
}
if (outoffset) {
*outoffset = finaloffset;
}
return GPA_ERR_OK;
}

View File

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/error.h"
std::string_view gpaStrError(const GpaError err) {
switch (err) {
case GPA_ERR_OK:
return "No error";
case GPA_ERR_INVALID_ARGS:
return "An invalid argument was used";
case GPA_ERR_OVERFLOW:
return "A buffer has overflown";
case GPA_ERR_TILING_ERROR:
return "An internal tiling error occured";
case GPA_ERR_UNSUPPORTED:
return "A requested feature is unsupported";
case GPA_ERR_INTERNAL_ERROR:
return "An internal error occured";
case GPA_ERR_NOT_COMPRESSED:
return "The texture is not compressed";
default:
return "";
}
}

View File

@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <string_view>
enum GpaError {
GPA_ERR_OK = 0,
GPA_ERR_INVALID_ARGS,
GPA_ERR_OVERFLOW,
GPA_ERR_TILING_ERROR,
GPA_ERR_UNSUPPORTED,
GPA_ERR_INTERNAL_ERROR,
GPA_ERR_NOT_COMPRESSED,
};
std::string_view gpaStrError(const GpaError err);

View File

@ -0,0 +1,74 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include "video_core/amdgpu/gpuaddr/error.h"
#include "video_core/amdgpu/gpuaddr/types.h"
//
// Surface
//
GpaError gpaComputeSurfaceInfo(GpaSurfaceInfo* out, const GpaTilingParams* tp);
GpaError gpaComputeHtileInfo(GpaHtileInfo* outinfo, const GpaHtileParams* params);
GpaError gpaComputeCmaskInfo(GpaCmaskInfo* outinfo, const GpaCmaskParams* params);
GpaError gpaComputeFmaskInfo(GpaFmaskInfo* outinfo, const GpaFmaskParams* params);
GpaError gpaComputeSurfaceTileMode(GnmTileMode* outtilemode, GnmGpuMode mingpumode,
GnmArrayMode arraymode, GpaSurfaceFlags flags,
GnmDataFormat surfacefmt, u32 numfragsperpixel,
GnmMicroTileMode mtm);
//
// Surface generation
//
GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype, u32 bpp,
u32 numfrags, bool mipmapped, GnmGpuMode mingpumode);
//
// Element/Utility
//
uint64_t gpaComputeSurfaceAddrFromCoordLinear(u32 x, u32 y, u32 slice, u32 sample, u32 bpp,
u32 pitch, u32 height, u32 numSlices,
u32* pBitPosition);
GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex,
u32 miplevel, u32 arrayslice);
GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, u32 bpp, u32 numfrags,
GnmGpuMode gpumode);
GpaError gpaComputeBaseSwizzle(u32* outswizzle, GnmTileMode tilemode, u32 surfindex, u32 bpp,
u32 numfrags, GnmGpuMode gpumode);
//
// Decompression
//
GpaError gpaGetDecompressedSize(uint64_t* outsize, const void* inbuffer, size_t inbuffersize,
const GpaTextureInfo* texinfo);
GpaError gpaDecompressTexture(void* outbuffer, uint64_t outbuffersize, const void* inbuffer,
uint64_t inbuffersize, const GpaTextureInfo* texinfo,
GnmDataFormat* outfmt);
//
// Tiler
//
GpaError gpaTpInit(GpaTilingParams* tp, const GpaTextureInfo* tex, u32 miplevel, u32 arrayslice);
GpaError gpaTileSurface(void* outtile, size_t outtilesize, const void* inuntile,
size_t inuntilesize, const GpaTilingParams* tp);
GpaError gpaTileSurfaceRegion(void* outtile, size_t outtilesize, const void* inuntile,
size_t inuntilesize, const GpaTilingParams* tp,
const GpaSurfaceRegion* region, u32 srcpitch, u32 srcslicepitch);
GpaError gpaTileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip,
u32 slice);
GpaError gpaTileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo);
GpaError gpaDetileSurface(void* outuntile, size_t outuntilesize, const void* intile,
size_t intilesize, const GpaTilingParams* tp);
GpaError gpaDetileSurfaceRegion(void* outuntile, size_t outuntilesize, const void* intile,
size_t intilesize, const GpaTilingParams* tp,
const GpaSurfaceRegion* region, u32 dstpitch, u32 dstslicepitch);
GpaError gpaDetileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip,
u32 slice);
GpaError gpaDetileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo);

View File

@ -0,0 +1,145 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
constexpr u32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling
constexpr u32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling
constexpr u32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes
constexpr u32 XThickTileThickness = 8; ///< Extra thick tiling thickness
constexpr u32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64
constexpr u32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache
constexpr u32 CmaskElemBits = 4; ///< Number of bits for CMASK element
constexpr u32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32
constexpr u32 MicroTilePixels = MicroTileWidth * MicroTileHeight;
constexpr u32 Block64K = 0x10000;
constexpr u32 PrtTileSize = Block64K;
constexpr u32 PIPE_INTERLEAVE_BYTES = 256;
constexpr u32 BANK_INTERLEAVE = 1;
constexpr u32 BLOCK_SIZE = 4;
constexpr u32 MICROTILE_SIZE = 8;
constexpr u32 TILE_SIZE = 8;
constexpr u32 DRAM_ROW_SIZE = 1024;
static inline uint32_t QLog2(uint32_t x) {
uint32_t y = 0;
switch (x) {
case 1:
y = 0;
break;
case 2:
y = 1;
break;
case 4:
y = 2;
break;
case 8:
y = 3;
break;
case 16:
y = 4;
break;
}
return y;
}
static inline bool IsPow2(const uint32_t x) {
return (x > 0) && ((x & (x - 1)) == 0);
}
static inline uint32_t NextPow2(uint32_t x) {
x = x - 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
static inline uint32_t PowTwoAlign32(uint32_t x, uint32_t align) {
return (x + (align - 1)) & (~(align - 1));
}
static inline uint32_t BitsToBytes32(uint32_t x) {
return (x + (8 - 1)) / 8;
}
static inline uint64_t BitsToBytes64(uint64_t x) {
return (x + (8 - 1)) / 8;
}
static inline uint32_t BytesToBits32(uint32_t x) {
return x * 8;
}
static inline uint64_t BytesToBits64(uint64_t x) {
return x * 8;
}
GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode);
GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode);
GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode);
GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode);
GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode);
GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode);
GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode,
uint32_t bitsperelem, uint32_t numfragsperpixel);
GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode,
GnmArrayMode newarraymode);
uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode);
bool gpaIsLinear(GnmArrayMode arraymode);
bool gpaIsMicroTiled(GnmArrayMode arraymode);
bool gpaIsMacroTiled(GnmArrayMode arraymode);
bool gpaIsPrt(GnmArrayMode arraymode);
GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm);
//
// BASE mode macrotilemode stuff
//
GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm);
GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm);
GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm);
//
// NEO mode macrotilemode stuff
//
GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm);
GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm);
GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm);
uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg);
static inline uint32_t getblockpitch(const GnmDataFormat fmt) {
const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt);
const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt);
return BLOCK_SIZE * bytesperelem / texelsperelemwide;
}
static inline uint32_t gettilepitch(const GnmDataFormat fmt) {
const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt);
const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt);
return TILE_SIZE * bytesperelem / texelsperelemwide;
}
static inline uint32_t getelemsperblockwide(const GnmDataFormat fmt) {
const uint32_t elemwidth = gnmDfGetTexelsPerElementWide(fmt);
return BLOCK_SIZE / elemwidth;
}
static inline uint32_t getelemsperblocktall(const GnmDataFormat fmt) {
const uint32_t elemheight = gnmDfGetTexelsPerElementTall(fmt);
return BLOCK_SIZE / elemheight;
}
static inline uint32_t GetTileSplitBytes(GnmTileSplit split, uint32_t bpp, uint32_t thickness) {
uint32_t tileBytes1x = BitsToBytes32(bpp * MicroTilePixels * thickness);
// Non-depth entries store a split factor
uint32_t sampleSplit = 64 << split;
return std::max(256u, sampleSplit * tileBytes1x);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,203 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype,
uint32_t bpp, uint32_t numfrags, bool mipmapped,
GnmGpuMode mingpumode) {
if (!outprops) {
return GPA_ERR_INVALID_ARGS;
}
GpaSurfaceFlags flags = {0};
switch (surfacetype) {
case GPA_SURFACE_COLORDISPLAY:
flags.display = 1;
break;
case GPA_SURFACE_COLOR:
break;
case GPA_SURFACE_DEPTHSTENCIL:
flags.depthtarget = 1;
flags.stenciltarget = 1;
break;
case GPA_SURFACE_DEPTH:
flags.depthtarget = 1;
break;
case GPA_SURFACE_STENCIL:
flags.stenciltarget = 1;
break;
case GPA_SURFACE_FMASK:
flags.fmask = 1;
break;
case GPA_SURFACE_TEXTUREFLAT:
case GPA_SURFACE_RWTEXTUREFLAT:
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
case GPA_SURFACE_TEXTUREVOLUME:
case GPA_SURFACE_RWTEXTUREVOLUME:
flags.volume = 1;
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
case GPA_SURFACE_TEXTURECUBEMAP:
case GPA_SURFACE_RWTEXTURECUBEMAP:
flags.cube = 1;
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
/* Set the requested tiling mode. */
GnmArrayMode arraymode = GNM_ARRAY_LINEAR_GENERAL;
switch (surfacetype) {
case GPA_SURFACE_COLORDISPLAY:
case GPA_SURFACE_COLOR:
case GPA_SURFACE_DEPTHSTENCIL:
case GPA_SURFACE_DEPTH:
case GPA_SURFACE_STENCIL:
case GPA_SURFACE_FMASK:
arraymode = flags.prt ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_2D_TILED_THIN1;
break;
case GPA_SURFACE_TEXTUREFLAT:
case GPA_SURFACE_RWTEXTUREFLAT:
case GPA_SURFACE_TEXTURECUBEMAP:
case GPA_SURFACE_RWTEXTURECUBEMAP:
/* MSAA requires 2D tiling. */
if (flags.prt) {
arraymode = numfrags > 1 ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_PRT_TILED_THIN1;
} else {
arraymode = numfrags > 1 ? GNM_ARRAY_2D_TILED_THIN1 : GNM_ARRAY_1D_TILED_THIN1;
}
break;
case GPA_SURFACE_TEXTUREVOLUME:
case GPA_SURFACE_RWTEXTUREVOLUME:
arraymode = flags.prt ? GNM_ARRAY_PRT_TILED_THICK : GNM_ARRAY_1D_TILED_THICK;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
/* Set the micro tile type. */
GnmMicroTileMode microtilemode = GNM_SURF_THIN_MICRO_TILING;
if (flags.display)
microtilemode = GNM_SURF_DISPLAY_MICRO_TILING;
else if (flags.depthtarget || flags.stenciltarget)
microtilemode = GNM_SURF_DEPTH_MICRO_TILING;
/* Find the tile mode type */
GnmTileMode tilemode = GNM_TM_DEPTH_2D_THIN_64;
if (microtilemode == GNM_SURF_DEPTH_MICRO_TILING) {
const uint32_t tilesize = gpaGetMicroTileThickness(arraymode) * bpp * numfrags *
MICROTILE_SIZE * MICROTILE_SIZE / 8;
if (mingpumode == GNM_GPU_NEO && DRAM_ROW_SIZE < tilesize) {
flags.texcompatible = 0;
}
if (flags.depthtarget && flags.texcompatible) {
switch (tilesize) {
case 128:
tilemode = GNM_TM_DEPTH_2D_THIN_128;
break;
case 256:
tilemode = GNM_TM_DEPTH_2D_THIN_256;
break;
case 512:
tilemode = GNM_TM_DEPTH_2D_THIN_512;
break;
default:
tilemode = GNM_TM_DEPTH_2D_THIN_1K;
break;
}
} else {
switch (numfrags) {
case 1:
tilemode = GNM_TM_DEPTH_2D_THIN_64;
break;
case 2:
case 4:
tilemode = GNM_TM_DEPTH_2D_THIN_128;
break;
case 8:
tilemode = GNM_TM_DEPTH_2D_THIN_256;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
}
switch (arraymode) {
case GNM_ARRAY_1D_TILED_THIN1:
tilemode = GNM_TM_DEPTH_1D_THIN;
break;
case GNM_ARRAY_PRT_TILED_THIN1:
tilemode = GNM_TM_DEPTH_2D_THIN_PRT_256;
break;
default:
break;
}
if (flags.depthtarget && !flags.stenciltarget && mingpumode == GNM_GPU_NEO &&
tilemode < GNM_TM_DEPTH_2D_THIN_256) {
tilemode = GNM_TM_DEPTH_2D_THIN_256;
}
} else if (microtilemode == GNM_SURF_DISPLAY_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_1D_THIN;
} else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_2D_THIN;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_2D_THIN_PRT;
} else {
tilemode = GNM_TM_DISPLAY_1D_THIN;
}
} else if (microtilemode == GNM_SURF_THIN_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THIN1) {
tilemode = GNM_TM_THIN_1D_THIN;
} else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) {
tilemode = GNM_TM_THIN_2D_THIN;
} else if (arraymode == GNM_ARRAY_3D_TILED_THIN1) {
tilemode = GNM_TM_THIN_3D_THIN;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) {
tilemode = GNM_TM_THIN_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
tilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
tilemode = GNM_TM_THIN_3D_THIN_PRT;
} else {
tilemode = GNM_TM_THIN_1D_THIN;
}
} else if (microtilemode == GNM_SURF_THICK_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THICK) {
tilemode = GNM_TM_THICK_1D_THICK;
} else if (arraymode == GNM_ARRAY_2D_TILED_THICK) {
tilemode = GNM_TM_THICK_2D_THICK;
} else if (arraymode == GNM_ARRAY_3D_TILED_THICK) {
tilemode = GNM_TM_THICK_3D_THICK;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THICK) {
tilemode = GNM_TM_THICK_THICK_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THICK) {
tilemode = GNM_TM_THICK_2D_THICK_PRT;
} else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THICK) {
tilemode = GNM_TM_THICK_3D_THICK_PRT;
} else if (arraymode == GNM_ARRAY_2D_TILED_XTHICK) {
tilemode = GNM_TM_THICK_2D_XTHICK;
} else if (arraymode == GNM_ARRAY_3D_TILED_XTHICK) {
tilemode = GNM_TM_THICK_3D_XTHICK;
} else {
tilemode = GNM_TM_THICK_1D_THICK;
}
} else if (microtilemode == GNM_SURF_ROTATED_MICRO_TILING) {
return GPA_ERR_INTERNAL_ERROR;
}
*outprops = (GpaSurfaceProperties){
.tilemode = tilemode,
.flags = flags,
};
return GPA_ERR_OK;
}

View File

@ -0,0 +1,815 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include <cmath>
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_THIN_1D_THIN:
return GNM_ARRAY_1D_TILED_THIN1;
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_THIN_2D_THIN:
return GNM_ARRAY_2D_TILED_THIN1;
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_THIN_THIN_PRT:
return GNM_ARRAY_PRT_TILED_THIN1;
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
return GNM_ARRAY_PRT_2D_TILED_THIN1;
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_ARRAY_3D_TILED_THIN1;
case GNM_TM_THICK_1D_THICK:
return GNM_ARRAY_1D_TILED_THICK;
case GNM_TM_THICK_2D_THICK:
return GNM_ARRAY_2D_TILED_THICK;
case GNM_TM_THICK_3D_THICK:
return GNM_ARRAY_3D_TILED_THICK;
case GNM_TM_THICK_THICK_PRT:
return GNM_ARRAY_PRT_TILED_THICK;
case GNM_TM_THICK_2D_THICK_PRT:
return GNM_ARRAY_PRT_2D_TILED_THICK;
case GNM_TM_THICK_3D_THICK_PRT:
return GNM_ARRAY_PRT_3D_TILED_THICK;
case GNM_TM_THICK_2D_XTHICK:
return GNM_ARRAY_2D_TILED_XTHICK;
case GNM_TM_THICK_3D_XTHICK:
return GNM_ARRAY_3D_TILED_XTHICK;
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
return GNM_ARRAY_LINEAR_ALIGNED;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ARRAY_LINEAR_GENERAL;
default:
abort();
}
}
GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
return GNM_SURF_DEPTH_MICRO_TILING;
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_SURF_DISPLAY_MICRO_TILING;
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_SURF_THIN_MICRO_TILING;
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
return GNM_SURF_THICK_MICRO_TILING;
default:
abort();
}
}
GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
return GNM_ADDR_SURF_P8_32x32_16x16;
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_3D_XTHICK:
return GNM_ADDR_SURF_P8_32x32_8x16;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SURF_P2;
default:
abort();
}
}
GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
return GNM_ADDR_SURF_P16_32x32_8x16;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SURF_P2;
default:
abort();
}
}
GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SAMPLE_SPLIT_1;
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_ADDR_SAMPLE_SPLIT_2;
default:
abort();
}
}
GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_SURF_TILE_SPLIT_64B;
case GNM_TM_DEPTH_2D_THIN_128:
return GNM_SURF_TILE_SPLIT_128B;
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
return GNM_SURF_TILE_SPLIT_256B;
case GNM_TM_DEPTH_2D_THIN_512:
return GNM_SURF_TILE_SPLIT_512B;
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
return GNM_SURF_TILE_SPLIT_1KB;
default:
abort();
}
}
GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode,
uint32_t bitsperelem, uint32_t numfragsperpixel) {
if (!outmtm) {
return GPA_ERR_INVALID_ARGS;
}
if (!IsPow2(numfragsperpixel) || numfragsperpixel > 16) {
return GPA_ERR_INVALID_ARGS;
}
if (bitsperelem < 1 || bitsperelem > 128) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode arraymode = gpaGetArrayMode(tilemode);
if (!gpaIsMacroTiled(arraymode)) {
return GPA_ERR_INVALID_ARGS;
}
const GnmMicroTileMode mtm = gpaGetMicroTileMode(tilemode);
const GnmSampleSplit samplesplithw = gpaGetSampleSplit(tilemode);
const GnmTileSplit tilesplithw = gpaGetTileSplit(tilemode);
const uint32_t tilethickness = gpaGetMicroTileThickness(arraymode);
const uint32_t tilebytes1x = bitsperelem * MICROTILE_SIZE * MICROTILE_SIZE * tilethickness / 8;
const uint32_t samplesplit = 1 << samplesplithw;
const uint32_t colortilesplit = std::max(256U, samplesplit * tilebytes1x);
const uint32_t tilesplit =
(mtm == GNM_SURF_DEPTH_MICRO_TILING) ? (64u << tilesplithw) : colortilesplit;
const uint32_t tilesplic = std::min(DRAM_ROW_SIZE, tilesplit);
const uint32_t tilebytes = std::min(tilesplic, numfragsperpixel * tilebytes1x);
const uint32_t mtmidx = log2((uint32_t)(tilebytes / 64));
*outmtm = GnmMacroTileMode(gpaIsPrt(arraymode) ? (mtmidx + 8) : mtmidx);
return GPA_ERR_OK;
}
GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode,
GnmArrayMode newarraymode) {
if (!outtilemode) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode oldarraymode = gpaGetArrayMode(oldtilemode);
if (newarraymode == oldarraymode) {
*outtilemode = oldtilemode;
return GPA_ERR_OK;
}
const GnmMicroTileMode mtm = gpaGetMicroTileMode(oldtilemode);
switch (mtm) {
case GNM_SURF_DEPTH_MICRO_TILING:
if (newarraymode != GNM_ARRAY_1D_TILED_THIN1) {
return GPA_ERR_TILING_ERROR;
}
*outtilemode = GNM_TM_DEPTH_1D_THIN;
return GPA_ERR_OK;
case GNM_SURF_DISPLAY_MICRO_TILING:
if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_DISPLAY_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_THICK_MICRO_TILING:
if (newarraymode == GNM_ARRAY_3D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_3D_THICK;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_2D_THICK;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_1D_THICK;
} else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_THIN_MICRO_TILING:
if (newarraymode == GNM_ARRAY_3D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_3D_THICK;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_2D_THICK;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_1D_THICK;
} else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_ROTATED_MICRO_TILING:
default:
return GPA_ERR_INVALID_ARGS;
}
return GPA_ERR_UNSUPPORTED;
}
uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
return 1;
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return 4;
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_3D_TILED_XTHICK:
return 8;
default:
abort();
}
}
bool gpaIsLinear(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
return true;
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return false;
default:
abort();
}
}
bool gpaIsMicroTiled(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
return true;
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return false;
default:
abort();
}
}
bool gpaIsMacroTiled(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
return false;
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
default:
abort();
}
}
static bool ismacrotiled3d(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
return false;
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
default:
abort();
}
}
bool gpaIsPrt(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
return false;
default:
abort();
}
}
//
// BASE mode macrotilemode stuff
//
GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_WIDTH_1;
default:
abort();
}
}
GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_HEIGHT_1;
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x2_16_DUP:
return GNM_SURF_BANK_HEIGHT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_BANK_HEIGHT_4;
case GNM_MACROTILEMODE_1x8_16:
return GNM_SURF_BANK_HEIGHT_8;
default:
abort();
}
}
GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_2_BANK;
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_4_DUP:
return GNM_SURF_4_BANK;
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_8_DUP:
return GNM_SURF_8_BANK;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_16_BANK;
default:
abort();
}
}
GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_MACRO_ASPECT_1;
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_MACRO_ASPECT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_MACRO_ASPECT_4;
default:
abort();
}
}
//
// NEO mode macrotilemode stuff
//
GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_HEIGHT_1;
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
return GNM_SURF_BANK_HEIGHT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_BANK_HEIGHT_4;
default:
abort();
}
}
GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_2_BANK;
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
return GNM_SURF_4_BANK;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_8_BANK;
case GNM_MACROTILEMODE_1x8_16:
return GNM_SURF_16_BANK;
default:
abort();
}
}
GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_MACRO_ASPECT_1;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_MACRO_ASPECT_2;
default:
abort();
}
}
uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg) {
switch (pipecfg) {
case GNM_ADDR_SURF_P2:
return 2;
case GNM_ADDR_SURF_P8_32x32_8x16:
case GNM_ADDR_SURF_P8_32x32_16x16:
return 8;
case GNM_ADDR_SURF_P16_32x32_8x16:
return 16;
default:
abort();
}
}
GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, uint32_t bpp, uint32_t numfrags,
GnmGpuMode gpumode) {
if (!outinfo || tilemode < GNM_TM_DEPTH_2D_THIN_64 ||
tilemode > GNM_TM_DISPLAY_LINEAR_GENERAL) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode arraymode = gpaGetArrayMode(tilemode);
GnmNumBanks banks = GNM_SURF_2_BANK;
GnmBankWidth bankw = GNM_SURF_BANK_WIDTH_1;
GnmBankHeight bankh = GNM_SURF_BANK_HEIGHT_1;
GnmMacroTileAspect macroaspect = GNM_SURF_MACRO_ASPECT_1;
const GnmTileSplit tilesplit = gpaGetTileSplit(tilemode);
const GnmPipeConfig pipeconfig =
gpumode == GNM_GPU_NEO ? gpaGetAltPipeConfig(tilemode) : gpaGetPipeConfig(tilemode);
if (gpaIsMacroTiled(arraymode)) {
GnmMacroTileMode macrotilemode = GNM_MACROTILEMODE_1x1_2;
GpaError err = gpaCalcSurfaceMacrotileMode(&macrotilemode, tilemode, bpp, numfrags);
if (err != GPA_ERR_OK) {
return err;
}
if (gpumode == GNM_GPU_NEO) {
banks = gpaGetAltNumBanks(macrotilemode);
bankh = gpaGetAltBankHeight(macrotilemode);
macroaspect = gpaGetAltMacrotileAspect(macrotilemode);
} else {
banks = gpaGetNumBanks(macrotilemode);
bankh = gpaGetBankHeight(macrotilemode);
macroaspect = gpaGetMacrotileAspect(macrotilemode);
}
bankw = gpaGetBankWidth(macrotilemode);
}
*outinfo = (GpaTileInfo){
.arraymode = arraymode,
.banks = banks,
.bankwidth = bankw,
.bankheight = bankh,
.macroaspectratio = macroaspect,
.tilesplit = tilesplit,
.pipeconfig = pipeconfig,
};
return GPA_ERR_OK;
}
static uint32_t GetBankPipeSwizzle(uint32_t bankSwizzle, uint32_t pipeSwizzle, uint64_t baseAddr,
const GpaTileInfo* tileinfo) {
const uint32_t numPipes = gpaGetPipeCount(tileinfo->pipeconfig);
const uint32_t pipeBits = QLog2(numPipes);
const uint32_t bankInterleaveBits = QLog2(BANK_INTERLEAVE);
const uint32_t tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
baseAddr ^= tileSwizzle * PIPE_INTERLEAVE_BYTES;
baseAddr >>= 8;
return (uint32_t)baseAddr;
}
GpaError gpaComputeBaseSwizzle(uint32_t* outswizzle, GnmTileMode tilemode, uint32_t surfindex,
uint32_t bpp, uint32_t numfrags, GnmGpuMode gpumode) {
if (!outswizzle) {
return GPA_ERR_INVALID_ARGS;
}
GpaTileInfo tileinfo = {};
GpaError err = gpaGetTileInfo(&tileinfo, tilemode, bpp, numfrags, gpumode);
if (err != GPA_ERR_OK) {
return err;
}
if (!gpaIsMacroTiled(tileinfo.arraymode)) {
*outswizzle = 0;
return GPA_ERR_OK;
}
/// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
static const uint8_t bankRotationArray[4][16] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_2_BANK
{0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_4_BANK
{0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_8_BANK
{0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9}, // ADDR_SURF_16_BANK
};
const uint32_t numBanks = 2 << tileinfo.banks;
const uint32_t numPipes = gpaGetPipeCount(tileinfo.pipeconfig);
const uint32_t bankSwizzle = bankRotationArray[tileinfo.banks][surfindex & (numBanks - 1)];
uint32_t pipeswizzle = 0;
if (ismacrotiled3d(tileinfo.arraymode)) {
pipeswizzle = surfindex & (numPipes - 1);
}
*outswizzle = GetBankPipeSwizzle(bankSwizzle, pipeswizzle, 0, &tileinfo);
return GPA_ERR_OK;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,375 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdbool>
#include "common/types.h"
#include "video_core/amdgpu/gpuaddr/dataformat.h"
enum GnmSampleSplit {
GNM_ADDR_SAMPLE_SPLIT_1 = 0x0,
GNM_ADDR_SAMPLE_SPLIT_2 = 0x1,
GNM_ADDR_SAMPLE_SPLIT_4 = 0x2,
GNM_ADDR_SAMPLE_SPLIT_8 = 0x3,
};
enum GnmMicroTileMode {
GNM_SURF_DISPLAY_MICRO_TILING = 0x0,
GNM_SURF_THIN_MICRO_TILING = 0x1,
GNM_SURF_DEPTH_MICRO_TILING = 0x2,
GNM_SURF_ROTATED_MICRO_TILING = 0x3,
GNM_SURF_THICK_MICRO_TILING = 0x4,
};
enum GnmMacroTileMode {
GNM_MACROTILEMODE_1x4_16 = 0x0,
GNM_MACROTILEMODE_1x2_16 = 0x1,
GNM_MACROTILEMODE_1x1_16 = 0x2,
GNM_MACROTILEMODE_1x1_16_DUP = 0x3,
GNM_MACROTILEMODE_1x1_8 = 0x4,
GNM_MACROTILEMODE_1x1_4 = 0x5,
GNM_MACROTILEMODE_1x1_2 = 0x6,
GNM_MACROTILEMODE_1x1_2_DUP = 0x7,
GNM_MACROTILEMODE_1x8_16 = 0x8,
GNM_MACROTILEMODE_1x4_16_DUP = 0x9,
GNM_MACROTILEMODE_1x2_16_DUP = 0xa,
GNM_MACROTILEMODE_1x1_16_DUP2 = 0xb,
GNM_MACROTILEMODE_1x1_8_DUP = 0xc,
GNM_MACROTILEMODE_1x1_4_DUP = 0xd,
GNM_MACROTILEMODE_1x1_2_DUP2 = 0xe,
GNM_MACROTILEMODE_1x1_2_DUP3 = 0xf,
};
enum GnmTileMode {
GNM_TM_DEPTH_2D_THIN_64 = 0x0,
GNM_TM_DEPTH_2D_THIN_128 = 0x1,
GNM_TM_DEPTH_2D_THIN_256 = 0x2,
GNM_TM_DEPTH_2D_THIN_512 = 0x3,
GNM_TM_DEPTH_2D_THIN_1K = 0x4,
GNM_TM_DEPTH_1D_THIN = 0x5,
GNM_TM_DEPTH_2D_THIN_PRT_256 = 0x6,
GNM_TM_DEPTH_2D_THIN_PRT_1K = 0x7,
GNM_TM_DISPLAY_LINEAR_ALIGNED = 0x8,
GNM_TM_DISPLAY_1D_THIN = 0x9,
GNM_TM_DISPLAY_2D_THIN = 0xa,
GNM_TM_DISPLAY_THIN_PRT = 0xb,
GNM_TM_DISPLAY_2D_THIN_PRT = 0xc,
GNM_TM_THIN_1D_THIN = 0xd,
GNM_TM_THIN_2D_THIN = 0xe,
GNM_TM_THIN_3D_THIN = 0xf,
GNM_TM_THIN_THIN_PRT = 0x10,
GNM_TM_THIN_2D_THIN_PRT = 0x11,
GNM_TM_THIN_3D_THIN_PRT = 0x12,
GNM_TM_THICK_1D_THICK = 0x13,
GNM_TM_THICK_2D_THICK = 0x14,
GNM_TM_THICK_3D_THICK = 0x15,
GNM_TM_THICK_THICK_PRT = 0x16,
GNM_TM_THICK_2D_THICK_PRT = 0x17,
GNM_TM_THICK_3D_THICK_PRT = 0x18,
GNM_TM_THICK_2D_XTHICK = 0x19,
GNM_TM_THICK_3D_XTHICK = 0x1a,
GNM_TM_DISPLAY_LINEAR_GENERAL = 0x1f,
};
enum GnmArrayMode {
GNM_ARRAY_LINEAR_GENERAL = 0x0,
GNM_ARRAY_LINEAR_ALIGNED = 0x1,
GNM_ARRAY_1D_TILED_THIN1 = 0x2,
GNM_ARRAY_1D_TILED_THICK = 0x3,
GNM_ARRAY_2D_TILED_THIN1 = 0x4,
GNM_ARRAY_PRT_TILED_THIN1 = 0x5,
GNM_ARRAY_PRT_2D_TILED_THIN1 = 0x6,
GNM_ARRAY_2D_TILED_THICK = 0x7,
GNM_ARRAY_2D_TILED_XTHICK = 0x8,
GNM_ARRAY_PRT_TILED_THICK = 0x9,
GNM_ARRAY_PRT_2D_TILED_THICK = 0xa,
GNM_ARRAY_PRT_3D_TILED_THIN1 = 0xb,
GNM_ARRAY_3D_TILED_THIN1 = 0xc,
GNM_ARRAY_3D_TILED_THICK = 0xd,
GNM_ARRAY_3D_TILED_XTHICK = 0xe,
GNM_ARRAY_PRT_3D_TILED_THICK = 0xf,
};
enum GnmNumBanks {
GNM_SURF_2_BANK = 0x0,
GNM_SURF_4_BANK = 0x1,
GNM_SURF_8_BANK = 0x2,
GNM_SURF_16_BANK = 0x3,
};
enum GnmGpuMode {
GNM_GPU_BASE = 0x0,
GNM_GPU_NEO = 0x1,
};
enum GnmBankWidth {
GNM_SURF_BANK_WIDTH_1 = 0x0,
GNM_SURF_BANK_WIDTH_2 = 0x1,
GNM_SURF_BANK_WIDTH_4 = 0x2,
GNM_SURF_BANK_WIDTH_8 = 0x3,
};
enum GnmBankHeight {
GNM_SURF_BANK_HEIGHT_1 = 0x0,
GNM_SURF_BANK_HEIGHT_2 = 0x1,
GNM_SURF_BANK_HEIGHT_4 = 0x2,
GNM_SURF_BANK_HEIGHT_8 = 0x3,
};
enum GnmPipeConfig {
GNM_ADDR_SURF_P2 = 0x0,
GNM_ADDR_SURF_P4_8x16 = 0x4,
GNM_ADDR_SURF_P4_16x16 = 0x5,
GNM_ADDR_SURF_P4_16x32 = 0x6,
GNM_ADDR_SURF_P4_32x32 = 0x7,
GNM_ADDR_SURF_P8_16x16_8x16 = 0x8,
GNM_ADDR_SURF_P8_16x32_8x16 = 0x9,
GNM_ADDR_SURF_P8_32x32_8x16 = 0xa,
GNM_ADDR_SURF_P8_16x32_16x16 = 0xb,
GNM_ADDR_SURF_P8_32x32_16x16 = 0xc,
GNM_ADDR_SURF_P8_32x32_16x32 = 0xd,
GNM_ADDR_SURF_P8_32x64_32x32 = 0xe,
GNM_ADDR_SURF_P16_32x32_8x16 = 0x10,
GNM_ADDR_SURF_P16_32x32_16x16 = 0x11,
};
enum GnmMacroTileAspect {
GNM_SURF_MACRO_ASPECT_1 = 0x0,
GNM_SURF_MACRO_ASPECT_2 = 0x1,
GNM_SURF_MACRO_ASPECT_4 = 0x2,
GNM_SURF_MACRO_ASPECT_8 = 0x3,
};
enum GnmTileSplit {
GNM_SURF_TILE_SPLIT_64B = 0x0,
GNM_SURF_TILE_SPLIT_128B = 0x1,
GNM_SURF_TILE_SPLIT_256B = 0x2,
GNM_SURF_TILE_SPLIT_512B = 0x3,
GNM_SURF_TILE_SPLIT_1KB = 0x4,
GNM_SURF_TILE_SPLIT_2KB = 0x5,
GNM_SURF_TILE_SPLIT_4KB = 0x6,
};
enum GpaSurfaceType {
GPA_SURFACE_COLORDISPLAY,
GPA_SURFACE_COLOR,
GPA_SURFACE_DEPTHSTENCIL,
GPA_SURFACE_DEPTH,
GPA_SURFACE_STENCIL,
GPA_SURFACE_FMASK,
GPA_SURFACE_TEXTUREFLAT,
GPA_SURFACE_TEXTUREVOLUME,
GPA_SURFACE_TEXTURECUBEMAP,
GPA_SURFACE_RWTEXTUREFLAT,
GPA_SURFACE_RWTEXTUREVOLUME,
GPA_SURFACE_RWTEXTURECUBEMAP,
};
struct GpaSurfaceFlags {
u32 colortarget : 1;
u32 depthtarget : 1;
u32 stenciltarget : 1;
u32 texture : 1;
u32 cube : 1;
u32 volume : 1;
u32 fmask : 1;
u32 cubeasarray : 1;
u32 overlay : 1;
u32 display : 1;
u32 prt : 1;
u32 pow2pad : 1;
u32 texcompatible : 1;
u32 _unused : 19;
};
static_assert(sizeof(GpaSurfaceFlags) == 0x4, "");
struct GpaSurfaceProperties {
GnmTileMode tilemode;
GpaSurfaceFlags flags;
};
struct GpaHtileParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmArrayMode arraymode;
GnmNumBanks banks;
GnmPipeConfig pipeconfig;
GnmGpuMode mingpumode;
struct {
u32 tccompatible : 1;
u32 reserved : 31;
} flags;
};
struct GpaCmaskParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmTileMode tilemode;
GnmGpuMode mingpumode;
struct {
u32 tccompatible : 1;
u32 reserved : 31;
} flags;
};
struct GpaFmaskParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmTileMode tilemode;
GnmGpuMode mingpumode;
bool isblockcompressed;
};
struct GpaTileInfo {
GnmArrayMode arraymode;
GnmNumBanks banks;
GnmBankWidth bankwidth;
GnmBankHeight bankheight;
GnmMacroTileAspect macroaspectratio;
GnmTileSplit tilesplit;
GnmPipeConfig pipeconfig;
};
struct GpaSurfaceInfo {
u32 pitch;
u32 height;
u32 depth;
uint64_t surfacesize;
u32 basealign;
u32 pitchalign;
u32 heightalign;
u32 depthalign;
u32 bitsperelem;
u32 blockwidth;
u32 blockheight;
GnmTileMode tilemode;
GpaTileInfo tileinfo;
struct {
u32 istexcompatible : 1;
u32 _unused : 31;
};
};
struct GpaHtileInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 bpp;
u32 macrowidth;
u32 macroheight;
uint64_t htilebytes;
uint64_t slicebytes;
};
struct GpaCmaskInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 bpp;
u32 macrowidth;
u32 macroheight;
u32 blockmax;
uint64_t cmaskbytes;
uint64_t slicebytes;
};
struct GpaFmaskInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 pitchalign;
u32 heightalign;
u32 bpp;
uint64_t fmaskbytes;
uint64_t slicebytes;
};
struct GpaSurfaceIndex {
u32 arrayindex;
u32 face;
u32 mip;
u32 depth;
u32 fragment;
u32 sample;
};
struct GpaTilingParams {
GnmTileMode tilemode;
GnmGpuMode mingpumode;
u32 linearwidth;
u32 linearheight;
u32 lineardepth;
u32 numfragsperpixel;
u32 basetiledpitch;
u32 miplevel;
u32 arrayslice;
GpaSurfaceFlags surfaceflags;
u32 bitsperfrag;
bool isblockcompressed;
};
struct GpaSurfaceRegion {
u32 left; // -X
u32 top; // -Y
u32 front; // -Z
u32 right; // +X
u32 bottom; // +Y
u32 back; // +Z
};
enum GnmTextureType {
GNM_TEXTURE_1D = 0x8,
GNM_TEXTURE_2D = 0x9,
GNM_TEXTURE_3D = 0xa,
GNM_TEXTURE_CUBEMAP = 0xb,
GNM_TEXTURE_1D_ARRAY = 0xc,
GNM_TEXTURE_2D_ARRAY = 0xd,
GNM_TEXTURE_2D_MSAA = 0xe,
GNM_TEXTURE_2D_ARRAY_MSAA = 0xf,
};
struct GpaTextureInfo {
GnmTextureType type;
GnmDataFormat fmt;
u32 width;
u32 height;
u32 pitch;
u32 depth;
u32 numfrags;
u32 nummips;
u32 numslices;
GnmTileMode tm;
GnmGpuMode mingpumode;
bool pow2pad;
};

View File

@ -10,11 +10,9 @@
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
#include <coroutine>
#include <functional>
#include <future>
#include <span>
#include <mutex>
#include <thread>
#include <queue>

View File

@ -7,6 +7,7 @@
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
namespace AmdGpu {
@ -132,10 +133,21 @@ struct Image {
}
u32 NumLayers() const {
return last_array - base_array + 1;
u32 slices = type == ImageType::Color3D ? 1 : depth.Value() + 1;
if (type == ImageType::Cube) {
slices *= 6;
}
if (pow2pad) {
slices = std::bit_ceil(slices);
}
return slices;
}
u32 NumLevels() const {
if (type == ImageType::Color2DMsaa ||
type == ImageType::Color2DMsaaArray) {
return 1;
}
return last_level + 1;
}
@ -155,9 +167,29 @@ struct Image {
return GetTilingMode() != TilingMode::Display_Linear;
}
[[nodiscard]] size_t GetSizeAligned() const {
// TODO: Derive this properly from tiling params
return (width + 1) * (height + 1) * NumComponents(GetDataFmt());
[[nodiscard]] size_t GetSizeAligned(const GpaTextureInfo& texinfo) const {
GpaTilingParams tp = {};
GpaError err = gpaTpInit(&tp, &texinfo, 0, 0);
ASSERT(err == GPA_ERR_OK);
GpaSurfaceInfo surfinfo = {};
size_t size = {};
for (uint32_t i = 0; i < NumLevels(); i += 1) {
tp.linearwidth = std::max(texinfo.width >> i, 1U);
tp.linearheight = std::max(texinfo.height >> i, 1U);
tp.lineardepth = std::max(texinfo.depth >> i, 1U);
tp.miplevel = i;
err = gpaComputeSurfaceInfo(&surfinfo, &tp);
ASSERT(err == GPA_ERR_OK);
size += NumLayers() * surfinfo.surfacesize;
if (tp.linearwidth == 1 && tp.linearheight == 1 &&
tp.lineardepth == 1) {
break;
}
}
return size;
}
};

View File

@ -291,7 +291,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eB8G8R8A8Srgb;
return vk::Format::eR8G8B8A8Srgb;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
@ -324,7 +324,8 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc3UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uint) {
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR8G8B8A8Uint;
}
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) {

View File

@ -64,7 +64,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
};
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
LOG_WARNING(Render_Vulkan, "Rectangle List primitive type is only supported for embedded VS");
LOG_WARNING(Render_Vulkan,
"Rectangle List primitive type is only supported for embedded VS");
}
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {

View File

@ -41,9 +41,11 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers)
physical_devices{instance->enumeratePhysicalDevices()} {}
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index)
: instance{CreateInstance(dl, window.getWindowInfo().type, true, false)},
: enable_validation{false}, instance{CreateInstance(dl, window.getWindowInfo().type, enable_validation, false)},
physical_devices{instance->enumeratePhysicalDevices()} {
if (enable_validation) {
debug_callback = CreateDebugCallback(*instance);
}
const std::size_t num_physical_devices = static_cast<u16>(physical_devices.size());
ASSERT_MSG(num_physical_devices > 0, "No physical devices found");

View File

@ -194,6 +194,7 @@ private:
private:
vk::DynamicLoader dl;
bool enable_validation{};
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;

View File

@ -112,7 +112,8 @@ void PipelineCache::RefreshGraphicsKey() {
key.color_formats[remapped_cb] =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
key.blend_controls[remapped_cb] = regs.blend_control[cb];
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass);
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
++remapped_cb;

View File

@ -60,7 +60,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
});
}
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
@ -91,7 +92,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices;
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
? 4
: regs.num_indices;
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();

View File

@ -9,8 +9,8 @@
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/tile_manager.h"
#include <vulkan/vulkan_format_traits.hpp>
#include <vk_mem_alloc.h>
#include <vulkan/vulkan_format_traits.hpp>
namespace VideoCore {
@ -41,7 +41,8 @@ static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
if (format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD32Sfloat) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock && format != vk::Format::eBc1RgbaUnormBlock) {
if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock &&
format != vk::Format::eBc1RgbaUnormBlock) {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
}
}
@ -54,10 +55,10 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color1DArray:
case AmdGpu::ImageType::Color2DArray:
case AmdGpu::ImageType::Cube:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e3D;
default:
UNREACHABLE();
@ -121,7 +122,28 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
guest_size_bytes = image.GetSizeAligned();
texinfo = GpaTextureInfo{
.type = static_cast<GnmTextureType>(image.type.Value()),
.fmt = {
.surfacefmt = static_cast<GnmImageFormat>(image.data_format.Value()),
.chantype = static_cast<GnmImgNumFormat>(image.num_format.Value()),
.chanx = static_cast<GnmChannel>(image.dst_sel_x.Value()),
.chany = static_cast<GnmChannel>(image.dst_sel_y.Value()),
.chanz = static_cast<GnmChannel>(image.dst_sel_z.Value()),
.chanw = static_cast<GnmChannel>(image.dst_sel_w.Value()),
},
.width = static_cast<u32>(image.width.Value() + 1),
.height = static_cast<u32>(image.height.Value() + 1),
.pitch = image.Pitch(),
.depth = 1,
.numfrags = 1,
.nummips = image.NumLevels(),
.numslices = image.NumLayers(),
.tm = static_cast<GnmTileMode>(image.tiling_index.Value()),
.mingpumode = GNM_GPU_BASE,
.pow2pad = bool(image.pow2pad.Value()),
};
guest_size_bytes = image.GetSizeAligned(texinfo);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
@ -165,21 +187,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.type == vk::ImageType::e3D) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
if (info.is_tiled) {
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
if (false) { // IsBlockCodedFormat()
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
}
if (info.pixel_format == vk::Format::eR16Sscaled) {
info.is_tiled = false;
flags = {};
}
info.usage = ImageUsageFlags(info.pixel_format);
if ((info.is_tiled && (info.pixel_format != vk::Format::eBc3UnormBlock) && info.pixel_format != vk::Format::eBc1RgbaSrgbBlock) || info.is_storage) {
info.usage |= vk::ImageUsageFlagBits::eStorage;
}
if (info.pixel_format == vk::Format::eD32Sfloat) {
aspect_mask = vk::ImageAspectFlagBits::eDepth;
}
@ -221,7 +230,8 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
return;
}
const vk::ImageMemoryBarrier barrier = {.srcAccessMask = access_mask,
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = access_mask,
.dstAccessMask = dst_mask,
.oldLayout = layout,
.newLayout = dst_layout,
@ -234,10 +244,11 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}};
},
};
// Adjust pipieline stage
vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead ||
const vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead ||
dst_mask == vk::AccessFlagBits::eTransferWrite)
? vk::PipelineStageFlagBits::eTransfer
: vk::PipelineStageFlagBits::eAllGraphics;

View File

@ -8,6 +8,7 @@
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -52,6 +53,7 @@ struct ImageInfo {
u32 pitch = 0;
u32 guest_size_bytes = 0;
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
GpaTextureInfo texinfo{};
};
struct UniqueImage {

View File

@ -3,8 +3,8 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_view.h"
namespace VideoCore {

View File

@ -4,6 +4,7 @@
#include <xxhash.h>
#include "common/assert.h"
#include "common/config.h"
#include "common/error.h"
#include "core/virtual_memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -23,7 +24,7 @@
void mprotect(void* addr, size_t len, int prot) {
DWORD old_prot{};
BOOL result = VirtualProtect(addr, len, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
ASSERT_MSG(result != 0, "Region protection failed {}", Common::GetLastErrorMsg());
}
#endif
@ -153,8 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage;
}
const ImageViewId view_id =
slot_image_views.insert(instance, view_info, image, usage_override);
const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
@ -193,82 +193,59 @@ void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
{
if (!tile_manager.TryDetile(image)) {
// Upload data to the staging buffer.
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
std::memcpy(data, image_data, image.info.guest_size_bytes);
const auto [staging_data, offset, _] = staging.Map(image.info.guest_size_bytes, 16);
if (image.info.texinfo.tm == GnmTileMode::GNM_TM_DISPLAY_LINEAR_GENERAL) {
std::memcpy(staging_data, image_data, image.info.guest_size_bytes);
} else {
const GpaError res = gpaDetileTextureAll(image_data, image.info.guest_size_bytes, staging_data,
image.info.guest_size_bytes, &image.info.texinfo);
ASSERT_MSG(res == GPA_ERR_OK, "Texture detiling failed with error: {}", gpaStrError(res));
}
staging.Commit(image.info.guest_size_bytes);
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// The mipmaps of each slice are next to each other in memory. So we iterate each layer
// and detile its mipmaps. Vulkan allows us to copy to the same mipmap of multiple layers at
// once, so we try to upload in that order.
boost::container::small_vector<vk::BufferImageCopy, 50> image_copies;
for (u32 mip = 0; mip < image.info.resources.levels; mip++) {
// Initialize tiling parameters.
GpaTilingParams tp = {};
GpaError res = gpaTpInit(&tp, &image.info.texinfo, mip, 0);
ASSERT(res == GPA_ERR_OK);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
// Figure out the offset of the slice0 mip in the image data and its size.
u64 surfoffset = 0;
u64 surfsize = 0;
res = gpaCalcSurfaceSizeOffset(&surfsize, &surfoffset, &image.info.texinfo, mip, 0);
ASSERT(res == GPA_ERR_OK);
// Add a new buffer copy for later.
image_copies.push_back({
.bufferOffset = offset + surfoffset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
}
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
return;
}
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = image.info.size.width >> m;
const u32 height = image.info.size.height >> m;
const u32 map_size = width * height * image.info.resources.layers;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(map_size, 16);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
} else {
std::memcpy(data, image_data, map_size);
}
staging.Commit(map_size);
image_data += map_size;
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = m,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = u32(image.info.resources.layers),
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, 1},
};
.imageExtent = {image.info.size.width >> mip, image.info.size.height >> mip, 1},
});
}
// Perform copy.
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
vk::ImageLayout::eTransferDstOptimal, image_copies);
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
}
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));

View File

@ -37,7 +37,8 @@ public:
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create = true);
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address,
bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);