Compare commits

...

3 Commits

Author SHA1 Message Date
raphaelthegreat c7bf29cbac spirv_image: Add dref instruction support 2024-06-07 03:23:34 +03:00
raphaelthegreat f291674790 amdgpu: Add freegnm detiler 2024-06-07 02:14:52 +03:00
raphaelthegreat 6cec16225d video_core: Various gpu fixes 2024-06-06 19:08:40 +03:00
46 changed files with 5800 additions and 154 deletions

View File

@ -377,6 +377,18 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/amdgpu/pm4_cmds.h
src/video_core/amdgpu/pm4_opcodes.h
src/video_core/amdgpu/resource.h
src/video_core/amdgpu/gpuaddr/dataformat.cpp
src/video_core/amdgpu/gpuaddr/dataformat.h
src/video_core/amdgpu/gpuaddr/element.cpp
src/video_core/amdgpu/gpuaddr/error.cpp
src/video_core/amdgpu/gpuaddr/error.h
src/video_core/amdgpu/gpuaddr/gpuaddr.h
src/video_core/amdgpu/gpuaddr/gpuaddr_private.h
src/video_core/amdgpu/gpuaddr/surface.cpp
src/video_core/amdgpu/gpuaddr/surfgen.cpp
src/video_core/amdgpu/gpuaddr/tilemodes.cpp
src/video_core/amdgpu/gpuaddr/tiler.cpp
src/video_core/amdgpu/gpuaddr/types.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
src/video_core/renderer_vulkan/liverpool_to_vk.h
src/video_core/renderer_vulkan/renderer_vulkan.cpp

View File

@ -184,8 +184,8 @@ void IOFile::Open(const fs::path& path, FileAccessMode mode, FileType type, File
if (!IsOpen()) {
const auto ec = std::error_code{errno, std::generic_category()};
LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}",
PathToUTF8String(file_path), ec.message());
// LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}",
// PathToUTF8String(file_path), ec.message());
}
}

View File

@ -189,7 +189,6 @@ public:
} else {
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
}
std::fflush(stdout);
}
private:

View File

@ -12,7 +12,7 @@
namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
// LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
@ -170,7 +170,7 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) {
}
int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
// LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
std::string path_name = mnt->GetHostFile(path);
memset(sb, 0, sizeof(OrbisKernelStat));
@ -198,7 +198,18 @@ int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) {
int result = sceKernelStat(path, sb);
if (result < 0) {
UNREACHABLE(); // TODO
return result;
// UNREACHABLE(); // TODO
}
return ORBIS_OK;
}
int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) {
// LOG_INFO(Lib_Kernel, "path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
std::string path_name = mnt->GetHostFile(path);
if (!std::filesystem::exists(path_name)) {
return SCE_KERNEL_ERROR_ENOENT;
}
return ORBIS_OK;
}
@ -216,6 +227,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("1-LFLmRFxxM", "libkernel", 1, "libkernel", 1, 1, sceKernelMkdir);
LIB_FUNCTION("eV9wAD2riIA", "libkernel", 1, "libkernel", 1, 1, sceKernelStat);
LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat);
LIB_FUNCTION("uWyW3v98sU4", "libkernel", 1, "libkernel", 1, 1, sceKernelCheckReachability);
// openOrbis (to check if it is valid out of OpenOrbis
LIB_FUNCTION("6c3rCVE-fTU", "libkernel", 1, "libkernel", 1, 1,

View File

@ -3,6 +3,7 @@
#include <mutex>
#include <thread>
#include <semaphore.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/singleton.h"
@ -392,7 +393,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
if (name != nullptr) {
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
// LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
}
switch (result) {
@ -417,7 +418,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) {
int result = pthread_mutex_destroy(&(*mutex)->pth_mutex);
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
// LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
delete *mutex;
*mutex = nullptr;
@ -1036,6 +1037,206 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
return linker->TlsGetAddr(index->ti_module, index->ti_offset);
}
int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
if (cond == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;
}
int result = pthread_cond_destroy(&(*cond)->cond);
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
switch (result) {
case 0:
return SCE_OK;
case EBUSY:
return SCE_KERNEL_ERROR_EBUSY;
default:
return SCE_KERNEL_ERROR_EINVAL;
}
}
int PS4_SYSV_ABI posix_pthread_condattr_init(ScePthreadCondattr* attr) {
int result = scePthreadCondattrInit(attr);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrInit: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadCondattr* attr) {
int result = scePthreadCondInit(cond, attr, "");
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondInit: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_condattr_destroy(ScePthreadCondattr* attr) {
int result = scePthreadCondattrDestroy(attr);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrDestroy: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_init(ScePthreadAttr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrInit(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t stacksize) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrSetstacksize(attr, stacksize);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int detachstate) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrSetdetachstate(attr, detachstate);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadMutexattrInit(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexattrSettype(attr, type);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_destroy(ScePthreadMutexattr* attr) {
int result = scePthreadMutexattrDestroy(attr);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) {
int result = scePthreadMutexattrSetprotocol(attr, protocol);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_t* size) {
int result = pthread_attr_getstack(&(*attr)->pth_attr, addr, size);
LOG_INFO(Kernel_Pthread, "scePthreadAttrGetstack: result = {}", result);
if (result == 0) {
return SCE_OK;
}
return SCE_KERNEL_ERROR_EINVAL;
}
int PS4_SYSV_ABI posix_pthread_attr_destroy(ScePthreadAttr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrDestroy(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* attr,
pthreadEntryFunc start_routine, void* arg) {
LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate");
int result = scePthreadCreate(thread, attr, start_routine, arg, "PS4_Thread");
if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_detach(ScePthread thread) {
LOG_INFO(Kernel_Pthread, "thread detach name = {}", thread->name);
thread->is_detached = true;
return ORBIS_OK;
}
int PS4_SYSV_ABI posix_sem_init(sem_t* sem, int pshared, unsigned int value) {
return sem_init(sem, pshared, value);
}
int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) {
return sem_wait(sem);
}
int PS4_SYSV_ABI posix_sem_post(sem_t* sem) {
return sem_post(sem);
}
int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexDestroy(mutex);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** value_ptr) {
return pthread_join(thread->pth, value_ptr);
}
int PS4_SYSV_ABI posix_pthread_mutex_trylock(ScePthreadMutex* mutex) {
int result = scePthreadMutexTrylock(mutex);
if (result < 0) {
UNREACHABLE();
}
return result;
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy);
LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate);
@ -1048,6 +1249,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf);
LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, pthread_self);
LIB_FUNCTION("EotR8a3ASf4", "libScePosix", 1, "libkernel", 1, 1, pthread_self);
LIB_FUNCTION("3qxgM4ezETA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetaffinity);
LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity);
LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet);
@ -1091,6 +1293,36 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
LIB_FUNCTION("2Z+PpY6CaJg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("mkx2fVhNMsg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack);
LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach);
LIB_FUNCTION("g+PZd2hiacg", "libkernel", 1, "libkernel", 1, 1, scePthreadCondDestroy);
// posix calls
LIB_FUNCTION("wtkt-teR1so", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_init);
LIB_FUNCTION("2Q0z6rnBrTE", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_attr_setstacksize);
LIB_FUNCTION("E+tyo3lp5Lw", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_attr_setdetachstate);
LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create);
LIB_FUNCTION("zHchY8ft5pk", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_destroy);
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
LIB_FUNCTION("mKoTx03HRWA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_init);
LIB_FUNCTION("dJcuQVn6-Iw", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_condattr_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_settype);
LIB_FUNCTION("5txKfcMUAok", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_setprotocol);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("K-jXhbt2gn4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_trylock);
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init);
LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait);
LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post);
LIB_FUNCTION("HF7lK46xzjY", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_destroy);
}
} // namespace Libraries::Kernel

View File

@ -1,10 +1,16 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include <pthread.h>
#include "common/native_clock.h"
#include "core/libraries/kernel/time_management.h"
#include "core/libraries/libs.h"
#ifdef _WIN64
#include <windows.h>
#endif
namespace Libraries::Kernel {
static u64 initial_ptc;
@ -30,6 +36,100 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() {
return clock->GetUptime();
}
int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) {
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
return 0;
}
int PS4_SYSV_ABI posix_usleep(u32 microseconds) {
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
return 0;
}
u32 PS4_SYSV_ABI sceKernelSleep(u32 seconds) {
std::this_thread::sleep_for(std::chrono::seconds(seconds));
return 0;
}
#define FILETIME_1970 116444736000000000ull /* seconds between 1/1/1601 and 1/1/1970 */
#define HECTONANOSEC_PER_SEC 10000000ull
struct SceKernelTimeval {
time_t tv_sec;
s64 tv_usec;
};
struct timezone {
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
struct timeval {
long tv_sec;
long tv_usec;
};
int PS4_SYSV_ABI getntptimeofday(struct timespec* tp, struct timezone* z) {
int res = 0;
union {
unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
FILETIME ft;
} _now;
TIME_ZONE_INFORMATION TimeZoneInformation;
DWORD tzi;
if (z != NULL) {
if ((tzi = GetTimeZoneInformation(&TimeZoneInformation)) != TIME_ZONE_ID_INVALID) {
z->tz_minuteswest = TimeZoneInformation.Bias;
if (tzi == TIME_ZONE_ID_DAYLIGHT)
z->tz_dsttime = 1;
else
z->tz_dsttime = 0;
} else {
z->tz_minuteswest = 0;
z->tz_dsttime = 0;
}
}
if (tp != NULL) {
typedef void(WINAPI * GetSystemTimeAsFileTime_t)(LPFILETIME);
static GetSystemTimeAsFileTime_t GetSystemTimeAsFileTime_p /* = 0 */;
/* Set function pointer during first call */
GetSystemTimeAsFileTime_t get_time =
__atomic_load_n(&GetSystemTimeAsFileTime_p, __ATOMIC_RELAXED);
if (get_time == NULL) {
/* Use GetSystemTimePreciseAsFileTime() if available (Windows 8 or later) */
get_time = (GetSystemTimeAsFileTime_t)(intptr_t)GetProcAddress(
GetModuleHandle("kernel32.dll"),
"GetSystemTimePreciseAsFileTime"); /* <1us precision on Windows 10 */
if (get_time == NULL)
get_time = GetSystemTimeAsFileTime; /* >15ms precision on Windows 10 */
__atomic_store_n(&GetSystemTimeAsFileTime_p, get_time, __ATOMIC_RELAXED);
}
get_time(&_now.ft); /* 100 nano-seconds since 1-1-1601 */
_now.ns100 -= FILETIME_1970; /* 100 nano-seconds since 1-1-1970 */
tp->tv_sec = _now.ns100 / HECTONANOSEC_PER_SEC; /* seconds since 1-1-1970 */
tp->tv_nsec = (long)(_now.ns100 % HECTONANOSEC_PER_SEC) * 100; /* nanoseconds */
}
return res;
}
int PS4_SYSV_ABI gettimeofday(struct timeval* p, struct timezone* z) {
struct timespec tp;
if (getntptimeofday(&tp, z))
return -1;
p->tv_sec = tp.tv_sec;
p->tv_usec = (tp.tv_nsec / 1000);
return 0;
}
int PS4_SYSV_ABI posix_nanosleep(timespec* requested_time, timespec* remaining) {
return nanosleep(requested_time, remaining);
}
void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
clock = std::make_unique<Common::NativeClock>();
initial_ptc = clock->GetUptime();
@ -39,6 +139,13 @@ void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
sceKernelGetProcessTimeCounterFrequency);
LIB_FUNCTION("-2IRUCO--PM", "libkernel", 1, "libkernel", 1, 1, sceKernelReadTsc);
LIB_FUNCTION("1j3S3n-tTW4", "libkernel", 1, "libkernel", 1, 1, sceKernelGetTscFrequency);
LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, gettimeofday);
LIB_FUNCTION("n88vx3C5nW8", "libkernel", 1, "libkernel", 1, 1, gettimeofday);
LIB_FUNCTION("1jfXLRVzisc", "libkernel", 1, "libkernel", 1, 1, sceKernelUsleep);
LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep);
LIB_FUNCTION("-ZR+hG7aDHw", "libkernel", 1, "libkernel", 1, 1, sceKernelSleep);
LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, sceKernelSleep);
LIB_FUNCTION("yS8U2TGCe1A", "libkernel", 1, "libkernel", 1, 1, posix_nanosleep);
}
} // namespace Libraries::Kernel

View File

@ -35,7 +35,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
}
// Align free position
free_addr = Common::AlignUp(free_addr, alignment);
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
ASSERT(free_addr >= search_start && free_addr + size <= search_end);
// Add the allocated region to the list and commit its pages.

View File

@ -12,13 +12,17 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
const auto info = inst->Flags<IR::TextureInstInfo>();
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
throw NotImplementedException("SPIR-V Instruction");
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords,
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
@ -26,9 +30,13 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod, const IR::Value& offset) {
throw NotImplementedException("SPIR-V Instruction");
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc,
Id offset) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
}
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,

View File

@ -334,12 +334,12 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id bias_lc, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc,
Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,

View File

@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
fmt::print("Tree:\n {}\n\n", DumpTree(root.children));
return syntax_list;
}

View File

@ -18,11 +18,19 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset =
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return ir.Imm32(inst.src[1].code);
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View File

@ -129,7 +129,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
}
break;
case OperandField::VccHi:
if (force_flt) {
value = ir.BitCast<IR::F32>(ir.GetVccHi());
} else {
value = ir.GetVccHi();
}
break;
default:
UNREACHABLE();
@ -297,6 +301,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MADAK_F32: // Yes these can share the opcode
translator.V_FMA_F32(inst);
break;
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
translator.IMAGE_SAMPLE(inst);
break;
@ -351,9 +357,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
case Opcode::S_CMP_LT_I32:
translator.S_CMP(ConditionOp::LT, true, inst);
break;
case Opcode::S_CMP_LG_I32:
translator.S_CMP(ConditionOp::LG, true, inst);
break;
case Opcode::S_CMP_GT_I32:
translator.S_CMP(ConditionOp::GT, true, inst);
break;
case Opcode::S_CMP_EQ_I32:
translator.S_CMP(ConditionOp::EQ, true, inst);
break;
@ -387,6 +399,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_SIN_F32:
translator.V_SIN_F32(inst);
break;
case Opcode::V_COS_F32:
translator.V_COS_F32(inst);
break;
case Opcode::V_LOG_F32:
translator.V_LOG_F32(inst);
break;
@ -522,6 +537,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_RNDNE_F32:
translator.V_RNDNE_F32(inst);
break;
case Opcode::V_BCNT_U32_B32:
translator.V_BCNT_U32_B32(inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:

View File

@ -104,6 +104,8 @@ public:
void V_ASHRREV_I32(const GcnInst& inst);
void V_MAD_U32_U24(const GcnInst& inst);
void V_RNDNE_F32(const GcnInst& inst);
void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View File

@ -25,8 +25,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
}
void Translator::V_MUL_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
}
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
@ -372,4 +371,15 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRoundEven(src0));
}
void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1));
}
void Translator::V_COS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPCos(src0));
}
} // namespace Shader::Gcn

View File

@ -31,7 +31,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
ASSERT(!mimg.da);
// ASSERT(!mimg.da);
IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code};

View File

@ -129,6 +129,7 @@ void IREmitter::SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value) {
template <>
U32 IREmitter::GetScalarReg(IR::ScalarReg reg) {
ASSERT(reg < IR::ScalarReg::Max);
return Inst<U32>(Opcode::GetScalarRegister, reg);
}

View File

@ -148,6 +148,9 @@ private:
} // Anonymous namespace
SharpLocation TrackSharp(const IR::Inst* inst) {
while (inst->GetOpcode() == IR::Opcode::Phi) {
inst = inst->Arg(0).InstRecursive();
}
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return SharpLocation{
.sgpr_base = u32(IR::ScalarReg::Max),
@ -163,6 +166,12 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
// Retrieve SGPR pair that holds sbase
const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
while (sbase0->GetOpcode() == IR::Opcode::Phi) {
sbase0 = sbase0->Arg(0).TryInstRecursive();
}
while (sbase1->GetOpcode() == IR::Opcode::Phi) {
sbase1 = sbase1->Arg(0).TryInstRecursive();
}
ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
sbase1->GetOpcode() == IR::Opcode::GetUserData,
"Nested resource loads not supported");

View File

@ -53,8 +53,15 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
fmt::print("Pre SSA passes\n\n{}\n", Shader::IR::DumpProgram(program));
std::fflush(stdout);
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
fmt::print("Post SSA passes\n\n{}\n", Shader::IR::DumpProgram(program));
std::fflush(stdout);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);

View File

@ -0,0 +1,387 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "common/assert.h"
#include "video_core/amdgpu/gpuaddr/dataformat.h"
GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags) {
GnmDataFormat res = {
.surfacefmt = GNM_IMG_DATA_FORMAT_INVALID,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_X,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
switch (numsamples) {
case 1:
// invalid
break;
case 2:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S2_F2;
break;
case 4:
case 8:
// invalid
break;
default:
abort();
}
break;
case 4:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S4_F4;
break;
case 8:
// invalid
break;
default:
abort();
}
break;
case 8:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK8_S8_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S8_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F4;
break;
case 8:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S8_F8;
break;
default:
abort();
}
break;
case 16:
switch (numfrags) {
case 1:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK16_S16_F1;
break;
case 2:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK32_S16_F2;
break;
case 4:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F4;
break;
case 8:
res.surfacefmt = GNM_IMG_DATA_FORMAT_FMASK64_S16_F8;
break;
default:
abort();
}
break;
default:
UNREACHABLE();
}
if (numsamples == 16) {
res.chany = GNM_CHAN_Y;
res.chanz = GNM_CHAN_CONSTANT1;
}
return res;
}
GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt) {
GnmImageFormat surfmt = GNM_IMG_DATA_FORMAT_INVALID;
GnmImgNumFormat chantype = GNM_IMG_NUM_FORMAT_UNORM;
switch (zfmt) {
case GNM_Z_INVALID:
default:
// surfmt = GNM_IMG_DATA_FORMAT_INVALID;
// chantype = GNM_IMG_NUM_FORMAT_UNORM;
break;
case GNM_Z_16:
surfmt = GNM_IMG_DATA_FORMAT_16;
// chantype = GNM_IMG_NUM_FORMAT_UNORM;
break;
case GNM_Z_32_FLOAT:
surfmt = GNM_IMG_DATA_FORMAT_32;
chantype = GNM_IMG_NUM_FORMAT_FLOAT;
break;
}
GnmDataFormat res = {
.surfacefmt = surfmt,
.chantype = chantype,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
return res;
}
uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_INVALID:
return 0;
case GNM_IMG_DATA_FORMAT_8:
case GNM_IMG_DATA_FORMAT_16:
case GNM_IMG_DATA_FORMAT_32:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 1;
case GNM_IMG_DATA_FORMAT_8_8:
case GNM_IMG_DATA_FORMAT_16_16:
case GNM_IMG_DATA_FORMAT_32_32:
case GNM_IMG_DATA_FORMAT_8_24:
case GNM_IMG_DATA_FORMAT_24_8:
case GNM_IMG_DATA_FORMAT_X24_8_32:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4:
case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1:
case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8:
case GNM_IMG_DATA_FORMAT_4_4:
return 2;
case GNM_IMG_DATA_FORMAT_10_11_11:
case GNM_IMG_DATA_FORMAT_11_11_10:
case GNM_IMG_DATA_FORMAT_32_32_32:
case GNM_IMG_DATA_FORMAT_5_6_5:
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
case GNM_IMG_DATA_FORMAT_5_9_9_9:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_6_5_5:
return 3;
case GNM_IMG_DATA_FORMAT_10_10_10_2:
case GNM_IMG_DATA_FORMAT_2_10_10_10:
case GNM_IMG_DATA_FORMAT_8_8_8_8:
case GNM_IMG_DATA_FORMAT_16_16_16_16:
case GNM_IMG_DATA_FORMAT_32_32_32_32:
case GNM_IMG_DATA_FORMAT_1_5_5_5:
case GNM_IMG_DATA_FORMAT_5_5_5_1:
case GNM_IMG_DATA_FORMAT_4_4_4_4:
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
default:
UNREACHABLE();
}
}
uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_INVALID:
return 0;
case GNM_IMG_DATA_FORMAT_8:
return 8;
case GNM_IMG_DATA_FORMAT_16:
case GNM_IMG_DATA_FORMAT_8_8:
return 16;
case GNM_IMG_DATA_FORMAT_32:
case GNM_IMG_DATA_FORMAT_16_16:
case GNM_IMG_DATA_FORMAT_10_11_11:
case GNM_IMG_DATA_FORMAT_11_11_10:
case GNM_IMG_DATA_FORMAT_10_10_10_2:
case GNM_IMG_DATA_FORMAT_2_10_10_10:
case GNM_IMG_DATA_FORMAT_8_8_8_8:
return 32;
case GNM_IMG_DATA_FORMAT_32_32:
case GNM_IMG_DATA_FORMAT_16_16_16_16:
return 64;
case GNM_IMG_DATA_FORMAT_32_32_32:
return 96;
case GNM_IMG_DATA_FORMAT_32_32_32_32:
return 128;
case GNM_IMG_DATA_FORMAT_5_6_5:
case GNM_IMG_DATA_FORMAT_1_5_5_5:
case GNM_IMG_DATA_FORMAT_5_5_5_1:
case GNM_IMG_DATA_FORMAT_4_4_4_4:
return 16;
case GNM_IMG_DATA_FORMAT_8_24:
case GNM_IMG_DATA_FORMAT_24_8:
return 32;
case GNM_IMG_DATA_FORMAT_X24_8_32:
return 64;
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
return 16;
case GNM_IMG_DATA_FORMAT_5_9_9_9:
return 32;
case GNM_IMG_DATA_FORMAT_BC1:
return 4;
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
return 8;
case GNM_IMG_DATA_FORMAT_BC4:
return 4;
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 8;
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S8_F1:
case GNM_IMG_DATA_FORMAT_FMASK8_S2_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F2:
case GNM_IMG_DATA_FORMAT_FMASK8_S4_F4:
return 8;
case GNM_IMG_DATA_FORMAT_FMASK16_S16_F1:
case GNM_IMG_DATA_FORMAT_FMASK16_S8_F2:
return 16;
case GNM_IMG_DATA_FORMAT_FMASK32_S16_F2:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F4:
case GNM_IMG_DATA_FORMAT_FMASK32_S8_F8:
return 32;
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F4:
case GNM_IMG_DATA_FORMAT_FMASK64_S16_F8:
return 64;
case GNM_IMG_DATA_FORMAT_4_4:
return 8;
case GNM_IMG_DATA_FORMAT_6_5_5:
return 16;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 1;
default:
UNREACHABLE();
}
}
bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out) {
switch (datafmt.chantype) {
case GNM_IMG_NUM_FORMAT_UNORM:
*out = GNM_NUMBER_UNORM;
break;
case GNM_IMG_NUM_FORMAT_SNORM:
*out = GNM_NUMBER_SNORM;
break;
*out = GNM_NUMBER_UINT;
case GNM_IMG_NUM_FORMAT_UINT:
break;
case GNM_IMG_NUM_FORMAT_SINT:
*out = GNM_NUMBER_SINT;
break;
case GNM_IMG_NUM_FORMAT_FLOAT:
*out = GNM_NUMBER_FLOAT;
break;
case GNM_IMG_NUM_FORMAT_SRGB:
*out = GNM_NUMBER_SRGB;
break;
default:
return false;
}
return true;
}
bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out) {
const uint32_t numcomps = gnmDfGetNumComponents(datafmt);
const GnmChannel cx = datafmt.chanx;
const GnmChannel cy = datafmt.chany;
const GnmChannel cz = datafmt.chanz;
const GnmChannel cw = datafmt.chanw;
if (numcomps == 1) {
if (cx == GNM_CHAN_X) {
*out = GNM_SWAP_STD;
return true;
} else if (cy == GNM_CHAN_X) {
*out = GNM_SWAP_ALT;
return true;
} else if (cz == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
} else if (numcomps == 2) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_X && cw == GNM_CHAN_Y) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
}
} else if (numcomps == 3) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cw == GNM_CHAN_Z) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
} else if (numcomps == 4) {
if (cx == GNM_CHAN_X && cy == GNM_CHAN_Y && cz == GNM_CHAN_Z && cw == GNM_CHAN_W) {
*out = GNM_SWAP_STD;
return true;
} else if (cx == GNM_CHAN_Z && cy == GNM_CHAN_Y && cz == GNM_CHAN_X && cw == GNM_CHAN_W) {
*out = GNM_SWAP_ALT;
return true;
} else if (cx == GNM_CHAN_W && cy == GNM_CHAN_Z && cz == GNM_CHAN_Y && cw == GNM_CHAN_X) {
*out = GNM_SWAP_STD_REV;
return true;
} else if (cx == GNM_CHAN_Y && cy == GNM_CHAN_Z && cz == GNM_CHAN_W && cw == GNM_CHAN_X) {
*out = GNM_SWAP_ALT_REV;
return true;
}
}
return false;
}
GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_16:
return GNM_Z_16;
case GNM_IMG_DATA_FORMAT_24_8:
return GNM_Z_24;
case GNM_IMG_DATA_FORMAT_32:
return GNM_Z_32_FLOAT;
default:
return GNM_Z_INVALID;
}
}
GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_8:
return GNM_STENCIL_8;
default:
return GNM_STENCIL_INVALID;
}
}

View File

@ -0,0 +1,409 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include "common/types.h"
enum GnmSurfaceNumber {
GNM_NUMBER_UNORM = 0x0,
GNM_NUMBER_SNORM = 0x1,
GNM_NUMBER_UINT = 0x4,
GNM_NUMBER_SINT = 0x5,
GNM_NUMBER_SRGB = 0x6,
GNM_NUMBER_FLOAT = 0x7,
};
enum GnmImageFormat {
GNM_IMG_DATA_FORMAT_INVALID = 0x0,
GNM_IMG_DATA_FORMAT_8 = 0x1,
GNM_IMG_DATA_FORMAT_16 = 0x2,
GNM_IMG_DATA_FORMAT_8_8 = 0x3,
GNM_IMG_DATA_FORMAT_32 = 0x4,
GNM_IMG_DATA_FORMAT_16_16 = 0x5,
GNM_IMG_DATA_FORMAT_10_11_11 = 0x6,
GNM_IMG_DATA_FORMAT_11_11_10 = 0x7,
GNM_IMG_DATA_FORMAT_10_10_10_2 = 0x8,
GNM_IMG_DATA_FORMAT_2_10_10_10 = 0x9,
GNM_IMG_DATA_FORMAT_8_8_8_8 = 0xa,
GNM_IMG_DATA_FORMAT_32_32 = 0xb,
GNM_IMG_DATA_FORMAT_16_16_16_16 = 0xc,
GNM_IMG_DATA_FORMAT_32_32_32 = 0xd,
GNM_IMG_DATA_FORMAT_32_32_32_32 = 0xe,
GNM_IMG_DATA_FORMAT_5_6_5 = 0x10,
GNM_IMG_DATA_FORMAT_1_5_5_5 = 0x11,
GNM_IMG_DATA_FORMAT_5_5_5_1 = 0x12,
GNM_IMG_DATA_FORMAT_4_4_4_4 = 0x13,
GNM_IMG_DATA_FORMAT_8_24 = 0x14,
GNM_IMG_DATA_FORMAT_24_8 = 0x15,
GNM_IMG_DATA_FORMAT_X24_8_32 = 0x16,
GNM_IMG_DATA_FORMAT_GB_GR = 0x20,
GNM_IMG_DATA_FORMAT_BG_RG = 0x21,
GNM_IMG_DATA_FORMAT_5_9_9_9 = 0x22,
GNM_IMG_DATA_FORMAT_BC1 = 0x23,
GNM_IMG_DATA_FORMAT_BC2 = 0x24,
GNM_IMG_DATA_FORMAT_BC3 = 0x25,
GNM_IMG_DATA_FORMAT_BC4 = 0x26,
GNM_IMG_DATA_FORMAT_BC5 = 0x27,
GNM_IMG_DATA_FORMAT_BC6 = 0x28,
GNM_IMG_DATA_FORMAT_BC7 = 0x29,
GNM_IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d,
GNM_IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e,
GNM_IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30,
GNM_IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31,
GNM_IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32,
GNM_IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33,
GNM_IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34,
GNM_IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35,
GNM_IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36,
GNM_IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37,
GNM_IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38,
GNM_IMG_DATA_FORMAT_4_4 = 0x39,
GNM_IMG_DATA_FORMAT_6_5_5 = 0x3a,
GNM_IMG_DATA_FORMAT_1 = 0x3b,
GNM_IMG_DATA_FORMAT_1_REVERSED = 0x3c,
GNM_IMG_DATA_FORMAT_32_AS_8 = 0x3d,
GNM_IMG_DATA_FORMAT_32_AS_8_8 = 0x3e,
GNM_IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f,
};
enum GnmImgNumFormat {
GNM_IMG_NUM_FORMAT_UNORM = 0x0,
GNM_IMG_NUM_FORMAT_SNORM = 0x1,
GNM_IMG_NUM_FORMAT_USCALED = 0x2,
GNM_IMG_NUM_FORMAT_SSCALED = 0x3,
GNM_IMG_NUM_FORMAT_UINT = 0x4,
GNM_IMG_NUM_FORMAT_SINT = 0x5,
GNM_IMG_NUM_FORMAT_SNORM_OGL = 0x6,
GNM_IMG_NUM_FORMAT_FLOAT = 0x7,
GNM_IMG_NUM_FORMAT_SRGB = 0x9,
GNM_IMG_NUM_FORMAT_UBNORM = 0xa,
GNM_IMG_NUM_FORMAT_UBNORM_OGL = 0xb,
GNM_IMG_NUM_FORMAT_UBINT = 0xc,
GNM_IMG_NUM_FORMAT_UBSCALED = 0xd,
};
enum GnmZFormat {
GNM_Z_INVALID = 0x0,
GNM_Z_16 = 0x1,
GNM_Z_24 = 0x2,
GNM_Z_32_FLOAT = 0x3,
};
enum GnmStencilFormat {
GNM_STENCIL_INVALID = 0x0,
GNM_STENCIL_8 = 0x1,
};
enum GnmChannel {
GNM_CHAN_CONSTANT0 = 0x0,
GNM_CHAN_CONSTANT1 = 0x1,
GNM_CHAN_X = 0x4,
GNM_CHAN_Y = 0x5,
GNM_CHAN_Z = 0x6,
GNM_CHAN_W = 0x7,
};
enum GnmSurfaceSwap {
GNM_SWAP_STD = 0x0,
GNM_SWAP_ALT = 0x1,
GNM_SWAP_STD_REV = 0x2,
GNM_SWAP_ALT_REV = 0x3,
};
union GnmDataFormat {
struct {
GnmImageFormat surfacefmt : 8;
GnmImgNumFormat chantype : 4;
GnmChannel chanx : 3;
GnmChannel chany : 3;
GnmChannel chanz : 3;
GnmChannel chanw : 3;
uint32_t _unused : 8;
};
uint32_t asuint;
};
static_assert(sizeof(GnmDataFormat) == 0x4, "");
GnmDataFormat gnmDfInitFromFmask(uint32_t numsamples, uint32_t numfrags);
GnmDataFormat gnmDfInitFromZ(GnmZFormat zfmt);
static inline GnmDataFormat gnmDfInitFromStencil(GnmStencilFormat stencilfmt,
GnmImgNumFormat chantype) {
GnmDataFormat res = {
.surfacefmt =
stencilfmt == GNM_STENCIL_8 ? GNM_IMG_DATA_FORMAT_8 : GNM_IMG_DATA_FORMAT_INVALID,
.chantype = chantype,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_X,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_X,
};
return res;
}
static inline uint32_t gnmDfGetTexelsPerElement(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 16;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 8;
default:
return 1;
}
}
uint32_t gnmDfGetNumComponents(const GnmDataFormat datafmt);
uint32_t gnmDfGetBitsPerElement(const GnmDataFormat datafmt);
static inline uint32_t gnmDfGetTotalBitsPerElement(const GnmDataFormat fmt) {
const uint32_t bitsperelem = gnmDfGetBitsPerElement(fmt);
const uint32_t texelsperelem = gnmDfGetTexelsPerElement(fmt);
return bitsperelem * texelsperelem;
}
static inline uint32_t gnmDfGetBytesPerElement(const GnmDataFormat datafmt) {
return gnmDfGetBitsPerElement(datafmt) / 8;
}
static inline uint32_t gnmDfGetTotalBytesPerElement(const GnmDataFormat fmt) {
return gnmDfGetTotalBitsPerElement(fmt) / 8;
}
static inline bool gnmDfIsBlockCompressed(const GnmDataFormat datafmt) {
switch (datafmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return true;
default:
return false;
}
}
bool gnmDfGetRtChannelType(const GnmDataFormat datafmt, GnmSurfaceNumber* out);
bool gnmDfGetRtChannelOrder(const GnmDataFormat datafmt, GnmSurfaceSwap* out);
GnmZFormat gnmDfGetZFormat(const GnmDataFormat datafmt);
GnmStencilFormat gnmDfGetStencilFormat(const GnmDataFormat datafmt);
static inline uint32_t gnmDfGetTexelsPerElementWide(const GnmDataFormat fmt) {
switch (fmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
case GNM_IMG_DATA_FORMAT_1:
case GNM_IMG_DATA_FORMAT_1_REVERSED:
return 8;
case GNM_IMG_DATA_FORMAT_GB_GR:
case GNM_IMG_DATA_FORMAT_BG_RG:
return 2;
default:
return 1;
}
}
static inline uint32_t gnmDfGetTexelsPerElementTall(const GnmDataFormat fmt) {
switch (fmt.surfacefmt) {
case GNM_IMG_DATA_FORMAT_BC1:
case GNM_IMG_DATA_FORMAT_BC2:
case GNM_IMG_DATA_FORMAT_BC3:
case GNM_IMG_DATA_FORMAT_BC4:
case GNM_IMG_DATA_FORMAT_BC5:
case GNM_IMG_DATA_FORMAT_BC6:
case GNM_IMG_DATA_FORMAT_BC7:
return 4;
default:
return 1;
}
}
static const GnmDataFormat GNM_FMT_INVALID = {
.surfacefmt = GNM_IMG_DATA_FORMAT_INVALID,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_CONSTANT0,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT0,
};
static const GnmDataFormat GNM_FMT_R8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_CONSTANT0,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_X,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R8G8B8A8_UINT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UINT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_B8G8R8A8_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_Z,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_B8G8R8A8_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_8_8_8_8,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_Z,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_X,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R16_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R16G16B16A16_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R16G16B16A16_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_16_16_16_16,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_CONSTANT0,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_CONSTANT0,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32B32_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT0,
};
static const GnmDataFormat GNM_FMT_R32G32B32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_R32G32B32A32_FLOAT = {
.surfacefmt = GNM_IMG_DATA_FORMAT_32_32_32_32,
.chantype = GNM_IMG_NUM_FORMAT_FLOAT,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_BC6_SNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC6,
.chantype = GNM_IMG_NUM_FORMAT_SNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_BC6_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC6,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_CONSTANT1,
};
static const GnmDataFormat GNM_FMT_BC7_UNORM = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC7,
.chantype = GNM_IMG_NUM_FORMAT_UNORM,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};
static const GnmDataFormat GNM_FMT_BC7_SRGB = {
.surfacefmt = GNM_IMG_DATA_FORMAT_BC7,
.chantype = GNM_IMG_NUM_FORMAT_SRGB,
.chanx = GNM_CHAN_X,
.chany = GNM_CHAN_Y,
.chanz = GNM_CHAN_Z,
.chanw = GNM_CHAN_W,
};

View File

@ -0,0 +1,82 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
uint64_t gpaComputeSurfaceAddrFromCoordLinear(
uint32_t x, ///< [in] x coord
uint32_t y, ///< [in] y coord
uint32_t slice, ///< [in] slice/depth index
uint32_t sample, ///< [in] sample index
uint32_t bpp, ///< [in] bits per pixel
uint32_t pitch, ///< [in] pitch
uint32_t height, ///< [in] height
uint32_t numSlices, ///< [in] number of slices
uint32_t* pBitPosition ///< [out] bit position inside a byte
) {
const uint64_t sliceSize = (uint64_t)pitch * height;
uint64_t sliceOffset = (slice + sample * numSlices) * sliceSize;
uint64_t rowOffset = (uint64_t)y * pitch;
uint64_t pixOffset = x;
uint64_t addr = (sliceOffset + rowOffset + pixOffset) * bpp;
if (pBitPosition) {
*pBitPosition = (uint32_t)(addr % 8);
}
addr /= 8;
return addr;
}
GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex,
uint32_t miplevel, uint32_t arrayslice) {
if (!tex) {
return GPA_ERR_INVALID_ARGS;
}
const uint32_t numarrayslices = tex->numslices;
const uint32_t basewidth = tex->width;
const uint32_t baseheight = tex->height;
const uint32_t basedepth = tex->depth;
const uint32_t basepitch = tex->pitch;
GpaTilingParams tp = {};
GpaError res = gpaTpInit(&tp, tex, 0, arrayslice);
if (res != GPA_ERR_OK) {
return res;
}
GpaSurfaceInfo si = {0};
uint32_t finaloffset = 0;
uint32_t finalsize = 0;
for (uint32_t m = 0; m <= miplevel; m += 1) {
finaloffset += numarrayslices * finalsize;
tp.linearwidth = std::max(basewidth >> m, 1U);
tp.linearheight = std::max(baseheight >> m, 1U);
tp.lineardepth = basedepth;
tp.basetiledpitch = basepitch;
tp.miplevel = m;
res = gpaComputeSurfaceInfo(&si, &tp);
if (res != GPA_ERR_OK) {
return res;
}
finalsize = si.surfacesize;
}
finaloffset += si.surfacesize * arrayslice;
if (outsize) {
*outsize = finalsize;
}
if (outoffset) {
*outoffset = finaloffset;
}
return GPA_ERR_OK;
}

View File

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/error.h"
std::string_view gpaStrError(const GpaError err) {
switch (err) {
case GPA_ERR_OK:
return "No error";
case GPA_ERR_INVALID_ARGS:
return "An invalid argument was used";
case GPA_ERR_OVERFLOW:
return "A buffer has overflown";
case GPA_ERR_TILING_ERROR:
return "An internal tiling error occured";
case GPA_ERR_UNSUPPORTED:
return "A requested feature is unsupported";
case GPA_ERR_INTERNAL_ERROR:
return "An internal error occured";
case GPA_ERR_NOT_COMPRESSED:
return "The texture is not compressed";
default:
return "";
}
}

View File

@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <string_view>
enum GpaError {
GPA_ERR_OK = 0,
GPA_ERR_INVALID_ARGS,
GPA_ERR_OVERFLOW,
GPA_ERR_TILING_ERROR,
GPA_ERR_UNSUPPORTED,
GPA_ERR_INTERNAL_ERROR,
GPA_ERR_NOT_COMPRESSED,
};
std::string_view gpaStrError(const GpaError err);

View File

@ -0,0 +1,74 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include "video_core/amdgpu/gpuaddr/error.h"
#include "video_core/amdgpu/gpuaddr/types.h"
//
// Surface
//
GpaError gpaComputeSurfaceInfo(GpaSurfaceInfo* out, const GpaTilingParams* tp);
GpaError gpaComputeHtileInfo(GpaHtileInfo* outinfo, const GpaHtileParams* params);
GpaError gpaComputeCmaskInfo(GpaCmaskInfo* outinfo, const GpaCmaskParams* params);
GpaError gpaComputeFmaskInfo(GpaFmaskInfo* outinfo, const GpaFmaskParams* params);
GpaError gpaComputeSurfaceTileMode(GnmTileMode* outtilemode, GnmGpuMode mingpumode,
GnmArrayMode arraymode, GpaSurfaceFlags flags,
GnmDataFormat surfacefmt, u32 numfragsperpixel,
GnmMicroTileMode mtm);
//
// Surface generation
//
GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype, u32 bpp,
u32 numfrags, bool mipmapped, GnmGpuMode mingpumode);
//
// Element/Utility
//
uint64_t gpaComputeSurfaceAddrFromCoordLinear(u32 x, u32 y, u32 slice, u32 sample, u32 bpp,
u32 pitch, u32 height, u32 numSlices,
u32* pBitPosition);
GpaError gpaCalcSurfaceSizeOffset(uint64_t* outsize, uint64_t* outoffset, const GpaTextureInfo* tex,
u32 miplevel, u32 arrayslice);
GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, u32 bpp, u32 numfrags,
GnmGpuMode gpumode);
GpaError gpaComputeBaseSwizzle(u32* outswizzle, GnmTileMode tilemode, u32 surfindex, u32 bpp,
u32 numfrags, GnmGpuMode gpumode);
//
// Decompression
//
GpaError gpaGetDecompressedSize(uint64_t* outsize, const void* inbuffer, size_t inbuffersize,
const GpaTextureInfo* texinfo);
GpaError gpaDecompressTexture(void* outbuffer, uint64_t outbuffersize, const void* inbuffer,
uint64_t inbuffersize, const GpaTextureInfo* texinfo,
GnmDataFormat* outfmt);
//
// Tiler
//
GpaError gpaTpInit(GpaTilingParams* tp, const GpaTextureInfo* tex, u32 miplevel, u32 arrayslice);
GpaError gpaTileSurface(void* outtile, size_t outtilesize, const void* inuntile,
size_t inuntilesize, const GpaTilingParams* tp);
GpaError gpaTileSurfaceRegion(void* outtile, size_t outtilesize, const void* inuntile,
size_t inuntilesize, const GpaTilingParams* tp,
const GpaSurfaceRegion* region, u32 srcpitch, u32 srcslicepitch);
GpaError gpaTileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip,
u32 slice);
GpaError gpaTileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo);
GpaError gpaDetileSurface(void* outuntile, size_t outuntilesize, const void* intile,
size_t intilesize, const GpaTilingParams* tp);
GpaError gpaDetileSurfaceRegion(void* outuntile, size_t outuntilesize, const void* intile,
size_t intilesize, const GpaTilingParams* tp,
const GpaSurfaceRegion* region, u32 dstpitch, u32 dstslicepitch);
GpaError gpaDetileTextureIndexed(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo, u32 mip,
u32 slice);
GpaError gpaDetileTextureAll(const void* inbuffer, size_t inbuffersize, void* outbuffer,
size_t outbuffersize, const GpaTextureInfo* texinfo);

View File

@ -0,0 +1,145 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
constexpr u32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling
constexpr u32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling
constexpr u32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes
constexpr u32 XThickTileThickness = 8; ///< Extra thick tiling thickness
constexpr u32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64
constexpr u32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache
constexpr u32 CmaskElemBits = 4; ///< Number of bits for CMASK element
constexpr u32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32
constexpr u32 MicroTilePixels = MicroTileWidth * MicroTileHeight;
constexpr u32 Block64K = 0x10000;
constexpr u32 PrtTileSize = Block64K;
constexpr u32 PIPE_INTERLEAVE_BYTES = 256;
constexpr u32 BANK_INTERLEAVE = 1;
constexpr u32 BLOCK_SIZE = 4;
constexpr u32 MICROTILE_SIZE = 8;
constexpr u32 TILE_SIZE = 8;
constexpr u32 DRAM_ROW_SIZE = 1024;
static inline uint32_t QLog2(uint32_t x) {
uint32_t y = 0;
switch (x) {
case 1:
y = 0;
break;
case 2:
y = 1;
break;
case 4:
y = 2;
break;
case 8:
y = 3;
break;
case 16:
y = 4;
break;
}
return y;
}
static inline bool IsPow2(const uint32_t x) {
return (x > 0) && ((x & (x - 1)) == 0);
}
static inline uint32_t NextPow2(uint32_t x) {
x = x - 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
static inline uint32_t PowTwoAlign32(uint32_t x, uint32_t align) {
return (x + (align - 1)) & (~(align - 1));
}
static inline uint32_t BitsToBytes32(uint32_t x) {
return (x + (8 - 1)) / 8;
}
static inline uint64_t BitsToBytes64(uint64_t x) {
return (x + (8 - 1)) / 8;
}
static inline uint32_t BytesToBits32(uint32_t x) {
return x * 8;
}
static inline uint64_t BytesToBits64(uint64_t x) {
return x * 8;
}
GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode);
GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode);
GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode);
GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode);
GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode);
GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode);
GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode,
uint32_t bitsperelem, uint32_t numfragsperpixel);
GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode,
GnmArrayMode newarraymode);
uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode);
bool gpaIsLinear(GnmArrayMode arraymode);
bool gpaIsMicroTiled(GnmArrayMode arraymode);
bool gpaIsMacroTiled(GnmArrayMode arraymode);
bool gpaIsPrt(GnmArrayMode arraymode);
GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm);
//
// BASE mode macrotilemode stuff
//
GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm);
GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm);
GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm);
//
// NEO mode macrotilemode stuff
//
GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm);
GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm);
GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm);
uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg);
static inline uint32_t getblockpitch(const GnmDataFormat fmt) {
const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt);
const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt);
return BLOCK_SIZE * bytesperelem / texelsperelemwide;
}
static inline uint32_t gettilepitch(const GnmDataFormat fmt) {
const uint32_t bytesperelem = gnmDfGetTotalBytesPerElement(fmt);
const uint32_t texelsperelemwide = gnmDfGetTexelsPerElementWide(fmt);
return TILE_SIZE * bytesperelem / texelsperelemwide;
}
static inline uint32_t getelemsperblockwide(const GnmDataFormat fmt) {
const uint32_t elemwidth = gnmDfGetTexelsPerElementWide(fmt);
return BLOCK_SIZE / elemwidth;
}
static inline uint32_t getelemsperblocktall(const GnmDataFormat fmt) {
const uint32_t elemheight = gnmDfGetTexelsPerElementTall(fmt);
return BLOCK_SIZE / elemheight;
}
static inline uint32_t GetTileSplitBytes(GnmTileSplit split, uint32_t bpp, uint32_t thickness) {
uint32_t tileBytes1x = BitsToBytes32(bpp * MicroTilePixels * thickness);
// Non-depth entries store a split factor
uint32_t sampleSplit = 64 << split;
return std::max(256u, sampleSplit * tileBytes1x);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,203 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
GpaError gpaFindOptimalSurface(GpaSurfaceProperties* outprops, GpaSurfaceType surfacetype,
uint32_t bpp, uint32_t numfrags, bool mipmapped,
GnmGpuMode mingpumode) {
if (!outprops) {
return GPA_ERR_INVALID_ARGS;
}
GpaSurfaceFlags flags = {0};
switch (surfacetype) {
case GPA_SURFACE_COLORDISPLAY:
flags.display = 1;
break;
case GPA_SURFACE_COLOR:
break;
case GPA_SURFACE_DEPTHSTENCIL:
flags.depthtarget = 1;
flags.stenciltarget = 1;
break;
case GPA_SURFACE_DEPTH:
flags.depthtarget = 1;
break;
case GPA_SURFACE_STENCIL:
flags.stenciltarget = 1;
break;
case GPA_SURFACE_FMASK:
flags.fmask = 1;
break;
case GPA_SURFACE_TEXTUREFLAT:
case GPA_SURFACE_RWTEXTUREFLAT:
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
case GPA_SURFACE_TEXTUREVOLUME:
case GPA_SURFACE_RWTEXTUREVOLUME:
flags.volume = 1;
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
case GPA_SURFACE_TEXTURECUBEMAP:
case GPA_SURFACE_RWTEXTURECUBEMAP:
flags.cube = 1;
flags.pow2pad = mipmapped;
flags.texcompatible = mingpumode == GNM_GPU_NEO;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
/* Set the requested tiling mode. */
GnmArrayMode arraymode = GNM_ARRAY_LINEAR_GENERAL;
switch (surfacetype) {
case GPA_SURFACE_COLORDISPLAY:
case GPA_SURFACE_COLOR:
case GPA_SURFACE_DEPTHSTENCIL:
case GPA_SURFACE_DEPTH:
case GPA_SURFACE_STENCIL:
case GPA_SURFACE_FMASK:
arraymode = flags.prt ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_2D_TILED_THIN1;
break;
case GPA_SURFACE_TEXTUREFLAT:
case GPA_SURFACE_RWTEXTUREFLAT:
case GPA_SURFACE_TEXTURECUBEMAP:
case GPA_SURFACE_RWTEXTURECUBEMAP:
/* MSAA requires 2D tiling. */
if (flags.prt) {
arraymode = numfrags > 1 ? GNM_ARRAY_PRT_2D_TILED_THIN1 : GNM_ARRAY_PRT_TILED_THIN1;
} else {
arraymode = numfrags > 1 ? GNM_ARRAY_2D_TILED_THIN1 : GNM_ARRAY_1D_TILED_THIN1;
}
break;
case GPA_SURFACE_TEXTUREVOLUME:
case GPA_SURFACE_RWTEXTUREVOLUME:
arraymode = flags.prt ? GNM_ARRAY_PRT_TILED_THICK : GNM_ARRAY_1D_TILED_THICK;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
/* Set the micro tile type. */
GnmMicroTileMode microtilemode = GNM_SURF_THIN_MICRO_TILING;
if (flags.display)
microtilemode = GNM_SURF_DISPLAY_MICRO_TILING;
else if (flags.depthtarget || flags.stenciltarget)
microtilemode = GNM_SURF_DEPTH_MICRO_TILING;
/* Find the tile mode type */
GnmTileMode tilemode = GNM_TM_DEPTH_2D_THIN_64;
if (microtilemode == GNM_SURF_DEPTH_MICRO_TILING) {
const uint32_t tilesize = gpaGetMicroTileThickness(arraymode) * bpp * numfrags *
MICROTILE_SIZE * MICROTILE_SIZE / 8;
if (mingpumode == GNM_GPU_NEO && DRAM_ROW_SIZE < tilesize) {
flags.texcompatible = 0;
}
if (flags.depthtarget && flags.texcompatible) {
switch (tilesize) {
case 128:
tilemode = GNM_TM_DEPTH_2D_THIN_128;
break;
case 256:
tilemode = GNM_TM_DEPTH_2D_THIN_256;
break;
case 512:
tilemode = GNM_TM_DEPTH_2D_THIN_512;
break;
default:
tilemode = GNM_TM_DEPTH_2D_THIN_1K;
break;
}
} else {
switch (numfrags) {
case 1:
tilemode = GNM_TM_DEPTH_2D_THIN_64;
break;
case 2:
case 4:
tilemode = GNM_TM_DEPTH_2D_THIN_128;
break;
case 8:
tilemode = GNM_TM_DEPTH_2D_THIN_256;
break;
default:
return GPA_ERR_INVALID_ARGS;
}
}
switch (arraymode) {
case GNM_ARRAY_1D_TILED_THIN1:
tilemode = GNM_TM_DEPTH_1D_THIN;
break;
case GNM_ARRAY_PRT_TILED_THIN1:
tilemode = GNM_TM_DEPTH_2D_THIN_PRT_256;
break;
default:
break;
}
if (flags.depthtarget && !flags.stenciltarget && mingpumode == GNM_GPU_NEO &&
tilemode < GNM_TM_DEPTH_2D_THIN_256) {
tilemode = GNM_TM_DEPTH_2D_THIN_256;
}
} else if (microtilemode == GNM_SURF_DISPLAY_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_1D_THIN;
} else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_2D_THIN;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
tilemode = GNM_TM_DISPLAY_2D_THIN_PRT;
} else {
tilemode = GNM_TM_DISPLAY_1D_THIN;
}
} else if (microtilemode == GNM_SURF_THIN_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THIN1) {
tilemode = GNM_TM_THIN_1D_THIN;
} else if (arraymode == GNM_ARRAY_2D_TILED_THIN1) {
tilemode = GNM_TM_THIN_2D_THIN;
} else if (arraymode == GNM_ARRAY_3D_TILED_THIN1) {
tilemode = GNM_TM_THIN_3D_THIN;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THIN1) {
tilemode = GNM_TM_THIN_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
tilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
tilemode = GNM_TM_THIN_3D_THIN_PRT;
} else {
tilemode = GNM_TM_THIN_1D_THIN;
}
} else if (microtilemode == GNM_SURF_THICK_MICRO_TILING) {
if (arraymode == GNM_ARRAY_1D_TILED_THICK) {
tilemode = GNM_TM_THICK_1D_THICK;
} else if (arraymode == GNM_ARRAY_2D_TILED_THICK) {
tilemode = GNM_TM_THICK_2D_THICK;
} else if (arraymode == GNM_ARRAY_3D_TILED_THICK) {
tilemode = GNM_TM_THICK_3D_THICK;
} else if (arraymode == GNM_ARRAY_PRT_TILED_THICK) {
tilemode = GNM_TM_THICK_THICK_PRT;
} else if (arraymode == GNM_ARRAY_PRT_2D_TILED_THICK) {
tilemode = GNM_TM_THICK_2D_THICK_PRT;
} else if (arraymode == GNM_ARRAY_PRT_3D_TILED_THICK) {
tilemode = GNM_TM_THICK_3D_THICK_PRT;
} else if (arraymode == GNM_ARRAY_2D_TILED_XTHICK) {
tilemode = GNM_TM_THICK_2D_XTHICK;
} else if (arraymode == GNM_ARRAY_3D_TILED_XTHICK) {
tilemode = GNM_TM_THICK_3D_XTHICK;
} else {
tilemode = GNM_TM_THICK_1D_THICK;
}
} else if (microtilemode == GNM_SURF_ROTATED_MICRO_TILING) {
return GPA_ERR_INTERNAL_ERROR;
}
*outprops = (GpaSurfaceProperties){
.tilemode = tilemode,
.flags = flags,
};
return GPA_ERR_OK;
}

View File

@ -0,0 +1,815 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#include <cmath>
#include "video_core/amdgpu/gpuaddr/gpuaddr_private.h"
GnmArrayMode gpaGetArrayMode(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_THIN_1D_THIN:
return GNM_ARRAY_1D_TILED_THIN1;
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_THIN_2D_THIN:
return GNM_ARRAY_2D_TILED_THIN1;
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_THIN_THIN_PRT:
return GNM_ARRAY_PRT_TILED_THIN1;
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
return GNM_ARRAY_PRT_2D_TILED_THIN1;
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_ARRAY_3D_TILED_THIN1;
case GNM_TM_THICK_1D_THICK:
return GNM_ARRAY_1D_TILED_THICK;
case GNM_TM_THICK_2D_THICK:
return GNM_ARRAY_2D_TILED_THICK;
case GNM_TM_THICK_3D_THICK:
return GNM_ARRAY_3D_TILED_THICK;
case GNM_TM_THICK_THICK_PRT:
return GNM_ARRAY_PRT_TILED_THICK;
case GNM_TM_THICK_2D_THICK_PRT:
return GNM_ARRAY_PRT_2D_TILED_THICK;
case GNM_TM_THICK_3D_THICK_PRT:
return GNM_ARRAY_PRT_3D_TILED_THICK;
case GNM_TM_THICK_2D_XTHICK:
return GNM_ARRAY_2D_TILED_XTHICK;
case GNM_TM_THICK_3D_XTHICK:
return GNM_ARRAY_3D_TILED_XTHICK;
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
return GNM_ARRAY_LINEAR_ALIGNED;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ARRAY_LINEAR_GENERAL;
default:
abort();
}
}
GnmMicroTileMode gpaGetMicroTileMode(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
return GNM_SURF_DEPTH_MICRO_TILING;
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_SURF_DISPLAY_MICRO_TILING;
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_SURF_THIN_MICRO_TILING;
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
return GNM_SURF_THICK_MICRO_TILING;
default:
abort();
}
}
GnmPipeConfig gpaGetPipeConfig(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
return GNM_ADDR_SURF_P8_32x32_16x16;
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_3D_XTHICK:
return GNM_ADDR_SURF_P8_32x32_8x16;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SURF_P2;
default:
abort();
}
}
GnmPipeConfig gpaGetAltPipeConfig(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
return GNM_ADDR_SURF_P16_32x32_8x16;
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SURF_P2;
default:
abort();
}
}
GnmSampleSplit gpaGetSampleSplit(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_2D_THIN_128:
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_512:
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_ADDR_SAMPLE_SPLIT_1;
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
return GNM_ADDR_SAMPLE_SPLIT_2;
default:
abort();
}
}
GnmTileSplit gpaGetTileSplit(GnmTileMode tilemode) {
switch (tilemode) {
case GNM_TM_DEPTH_2D_THIN_64:
case GNM_TM_DEPTH_1D_THIN:
case GNM_TM_DISPLAY_LINEAR_ALIGNED:
case GNM_TM_DISPLAY_1D_THIN:
case GNM_TM_DISPLAY_2D_THIN:
case GNM_TM_DISPLAY_THIN_PRT:
case GNM_TM_DISPLAY_2D_THIN_PRT:
case GNM_TM_THIN_1D_THIN:
case GNM_TM_THIN_2D_THIN:
case GNM_TM_THIN_3D_THIN:
case GNM_TM_THIN_THIN_PRT:
case GNM_TM_THIN_2D_THIN_PRT:
case GNM_TM_THIN_3D_THIN_PRT:
case GNM_TM_THICK_1D_THICK:
case GNM_TM_THICK_2D_THICK:
case GNM_TM_THICK_3D_THICK:
case GNM_TM_THICK_THICK_PRT:
case GNM_TM_THICK_2D_THICK_PRT:
case GNM_TM_THICK_3D_THICK_PRT:
case GNM_TM_THICK_2D_XTHICK:
case GNM_TM_THICK_3D_XTHICK:
case GNM_TM_DISPLAY_LINEAR_GENERAL:
return GNM_SURF_TILE_SPLIT_64B;
case GNM_TM_DEPTH_2D_THIN_128:
return GNM_SURF_TILE_SPLIT_128B;
case GNM_TM_DEPTH_2D_THIN_256:
case GNM_TM_DEPTH_2D_THIN_PRT_256:
return GNM_SURF_TILE_SPLIT_256B;
case GNM_TM_DEPTH_2D_THIN_512:
return GNM_SURF_TILE_SPLIT_512B;
case GNM_TM_DEPTH_2D_THIN_1K:
case GNM_TM_DEPTH_2D_THIN_PRT_1K:
return GNM_SURF_TILE_SPLIT_1KB;
default:
abort();
}
}
GpaError gpaCalcSurfaceMacrotileMode(GnmMacroTileMode* outmtm, GnmTileMode tilemode,
uint32_t bitsperelem, uint32_t numfragsperpixel) {
if (!outmtm) {
return GPA_ERR_INVALID_ARGS;
}
if (!IsPow2(numfragsperpixel) || numfragsperpixel > 16) {
return GPA_ERR_INVALID_ARGS;
}
if (bitsperelem < 1 || bitsperelem > 128) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode arraymode = gpaGetArrayMode(tilemode);
if (!gpaIsMacroTiled(arraymode)) {
return GPA_ERR_INVALID_ARGS;
}
const GnmMicroTileMode mtm = gpaGetMicroTileMode(tilemode);
const GnmSampleSplit samplesplithw = gpaGetSampleSplit(tilemode);
const GnmTileSplit tilesplithw = gpaGetTileSplit(tilemode);
const uint32_t tilethickness = gpaGetMicroTileThickness(arraymode);
const uint32_t tilebytes1x = bitsperelem * MICROTILE_SIZE * MICROTILE_SIZE * tilethickness / 8;
const uint32_t samplesplit = 1 << samplesplithw;
const uint32_t colortilesplit = std::max(256U, samplesplit * tilebytes1x);
const uint32_t tilesplit =
(mtm == GNM_SURF_DEPTH_MICRO_TILING) ? (64u << tilesplithw) : colortilesplit;
const uint32_t tilesplic = std::min(DRAM_ROW_SIZE, tilesplit);
const uint32_t tilebytes = std::min(tilesplic, numfragsperpixel * tilebytes1x);
const uint32_t mtmidx = log2((uint32_t)(tilebytes / 64));
*outmtm = GnmMacroTileMode(gpaIsPrt(arraymode) ? (mtmidx + 8) : mtmidx);
return GPA_ERR_OK;
}
GpaError gpaAdjustTileMode(GnmTileMode* outtilemode, GnmTileMode oldtilemode,
GnmArrayMode newarraymode) {
if (!outtilemode) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode oldarraymode = gpaGetArrayMode(oldtilemode);
if (newarraymode == oldarraymode) {
*outtilemode = oldtilemode;
return GPA_ERR_OK;
}
const GnmMicroTileMode mtm = gpaGetMicroTileMode(oldtilemode);
switch (mtm) {
case GNM_SURF_DEPTH_MICRO_TILING:
if (newarraymode != GNM_ARRAY_1D_TILED_THIN1) {
return GPA_ERR_TILING_ERROR;
}
*outtilemode = GNM_TM_DEPTH_1D_THIN;
return GPA_ERR_OK;
case GNM_SURF_DISPLAY_MICRO_TILING:
if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_DISPLAY_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_THICK_MICRO_TILING:
if (newarraymode == GNM_ARRAY_3D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_3D_THICK;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_2D_THICK;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_1D_THICK;
} else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_THIN_MICRO_TILING:
if (newarraymode == GNM_ARRAY_3D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_3D_THICK;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_2D_THICK;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THICK) {
*outtilemode = GNM_TM_THICK_1D_THICK;
} else if (newarraymode == GNM_ARRAY_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_3D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_3D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN;
} else if (newarraymode == GNM_ARRAY_PRT_2D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_2D_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_PRT_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_THIN_PRT;
} else if (newarraymode == GNM_ARRAY_1D_TILED_THIN1) {
*outtilemode = GNM_TM_THIN_1D_THIN;
} else {
break;
}
return GPA_ERR_OK;
case GNM_SURF_ROTATED_MICRO_TILING:
default:
return GPA_ERR_INVALID_ARGS;
}
return GPA_ERR_UNSUPPORTED;
}
uint32_t gpaGetMicroTileThickness(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
return 1;
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return 4;
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_3D_TILED_XTHICK:
return 8;
default:
abort();
}
}
bool gpaIsLinear(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
return true;
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return false;
default:
abort();
}
}
bool gpaIsMicroTiled(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
return true;
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return false;
default:
abort();
}
}
bool gpaIsMacroTiled(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
return false;
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
default:
abort();
}
}
static bool ismacrotiled3d(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THICK:
return false;
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
default:
abort();
}
}
bool gpaIsPrt(GnmArrayMode arraymode) {
switch (arraymode) {
case GNM_ARRAY_PRT_TILED_THIN1:
case GNM_ARRAY_PRT_TILED_THICK:
case GNM_ARRAY_PRT_2D_TILED_THIN1:
case GNM_ARRAY_PRT_2D_TILED_THICK:
case GNM_ARRAY_PRT_3D_TILED_THIN1:
case GNM_ARRAY_PRT_3D_TILED_THICK:
return true;
case GNM_ARRAY_LINEAR_GENERAL:
case GNM_ARRAY_LINEAR_ALIGNED:
case GNM_ARRAY_1D_TILED_THIN1:
case GNM_ARRAY_1D_TILED_THICK:
case GNM_ARRAY_2D_TILED_THIN1:
case GNM_ARRAY_2D_TILED_THICK:
case GNM_ARRAY_2D_TILED_XTHICK:
case GNM_ARRAY_3D_TILED_THIN1:
case GNM_ARRAY_3D_TILED_THICK:
case GNM_ARRAY_3D_TILED_XTHICK:
return false;
default:
abort();
}
}
//
// BASE mode macrotilemode stuff
//
GnmBankWidth gpaGetBankWidth(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_WIDTH_1;
default:
abort();
}
}
GnmBankHeight gpaGetBankHeight(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_HEIGHT_1;
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x2_16_DUP:
return GNM_SURF_BANK_HEIGHT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_BANK_HEIGHT_4;
case GNM_MACROTILEMODE_1x8_16:
return GNM_SURF_BANK_HEIGHT_8;
default:
abort();
}
}
GnmNumBanks gpaGetNumBanks(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_2_BANK;
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_4_DUP:
return GNM_SURF_4_BANK;
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_8_DUP:
return GNM_SURF_8_BANK;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_16_BANK;
default:
abort();
}
}
GnmMacroTileAspect gpaGetMacrotileAspect(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_MACRO_ASPECT_1;
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_MACRO_ASPECT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_MACRO_ASPECT_4;
default:
abort();
}
}
//
// NEO mode macrotilemode stuff
//
GnmBankHeight gpaGetAltBankHeight(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_BANK_HEIGHT_1;
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
return GNM_SURF_BANK_HEIGHT_2;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_BANK_HEIGHT_4;
default:
abort();
}
}
GnmNumBanks gpaGetAltNumBanks(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_2_BANK;
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
return GNM_SURF_4_BANK;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x4_16_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
return GNM_SURF_8_BANK;
case GNM_MACROTILEMODE_1x8_16:
return GNM_SURF_16_BANK;
default:
abort();
}
}
GnmMacroTileAspect gpaGetAltMacrotileAspect(GnmMacroTileMode mtm) {
switch (mtm) {
case GNM_MACROTILEMODE_1x1_16:
case GNM_MACROTILEMODE_1x1_16_DUP:
case GNM_MACROTILEMODE_1x1_8:
case GNM_MACROTILEMODE_1x1_4:
case GNM_MACROTILEMODE_1x1_2:
case GNM_MACROTILEMODE_1x1_2_DUP:
case GNM_MACROTILEMODE_1x2_16_DUP:
case GNM_MACROTILEMODE_1x1_16_DUP2:
case GNM_MACROTILEMODE_1x1_8_DUP:
case GNM_MACROTILEMODE_1x1_4_DUP:
case GNM_MACROTILEMODE_1x1_2_DUP2:
case GNM_MACROTILEMODE_1x1_2_DUP3:
return GNM_SURF_MACRO_ASPECT_1;
case GNM_MACROTILEMODE_1x4_16:
case GNM_MACROTILEMODE_1x2_16:
case GNM_MACROTILEMODE_1x8_16:
case GNM_MACROTILEMODE_1x4_16_DUP:
return GNM_SURF_MACRO_ASPECT_2;
default:
abort();
}
}
uint32_t gpaGetPipeCount(GnmPipeConfig pipecfg) {
switch (pipecfg) {
case GNM_ADDR_SURF_P2:
return 2;
case GNM_ADDR_SURF_P8_32x32_8x16:
case GNM_ADDR_SURF_P8_32x32_16x16:
return 8;
case GNM_ADDR_SURF_P16_32x32_8x16:
return 16;
default:
abort();
}
}
GpaError gpaGetTileInfo(GpaTileInfo* outinfo, GnmTileMode tilemode, uint32_t bpp, uint32_t numfrags,
GnmGpuMode gpumode) {
if (!outinfo || tilemode < GNM_TM_DEPTH_2D_THIN_64 ||
tilemode > GNM_TM_DISPLAY_LINEAR_GENERAL) {
return GPA_ERR_INVALID_ARGS;
}
const GnmArrayMode arraymode = gpaGetArrayMode(tilemode);
GnmNumBanks banks = GNM_SURF_2_BANK;
GnmBankWidth bankw = GNM_SURF_BANK_WIDTH_1;
GnmBankHeight bankh = GNM_SURF_BANK_HEIGHT_1;
GnmMacroTileAspect macroaspect = GNM_SURF_MACRO_ASPECT_1;
const GnmTileSplit tilesplit = gpaGetTileSplit(tilemode);
const GnmPipeConfig pipeconfig =
gpumode == GNM_GPU_NEO ? gpaGetAltPipeConfig(tilemode) : gpaGetPipeConfig(tilemode);
if (gpaIsMacroTiled(arraymode)) {
GnmMacroTileMode macrotilemode = GNM_MACROTILEMODE_1x1_2;
GpaError err = gpaCalcSurfaceMacrotileMode(&macrotilemode, tilemode, bpp, numfrags);
if (err != GPA_ERR_OK) {
return err;
}
if (gpumode == GNM_GPU_NEO) {
banks = gpaGetAltNumBanks(macrotilemode);
bankh = gpaGetAltBankHeight(macrotilemode);
macroaspect = gpaGetAltMacrotileAspect(macrotilemode);
} else {
banks = gpaGetNumBanks(macrotilemode);
bankh = gpaGetBankHeight(macrotilemode);
macroaspect = gpaGetMacrotileAspect(macrotilemode);
}
bankw = gpaGetBankWidth(macrotilemode);
}
*outinfo = (GpaTileInfo){
.arraymode = arraymode,
.banks = banks,
.bankwidth = bankw,
.bankheight = bankh,
.macroaspectratio = macroaspect,
.tilesplit = tilesplit,
.pipeconfig = pipeconfig,
};
return GPA_ERR_OK;
}
static uint32_t GetBankPipeSwizzle(uint32_t bankSwizzle, uint32_t pipeSwizzle, uint64_t baseAddr,
const GpaTileInfo* tileinfo) {
const uint32_t numPipes = gpaGetPipeCount(tileinfo->pipeconfig);
const uint32_t pipeBits = QLog2(numPipes);
const uint32_t bankInterleaveBits = QLog2(BANK_INTERLEAVE);
const uint32_t tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
baseAddr ^= tileSwizzle * PIPE_INTERLEAVE_BYTES;
baseAddr >>= 8;
return (uint32_t)baseAddr;
}
GpaError gpaComputeBaseSwizzle(uint32_t* outswizzle, GnmTileMode tilemode, uint32_t surfindex,
uint32_t bpp, uint32_t numfrags, GnmGpuMode gpumode) {
if (!outswizzle) {
return GPA_ERR_INVALID_ARGS;
}
GpaTileInfo tileinfo = {};
GpaError err = gpaGetTileInfo(&tileinfo, tilemode, bpp, numfrags, gpumode);
if (err != GPA_ERR_OK) {
return err;
}
if (!gpaIsMacroTiled(tileinfo.arraymode)) {
*outswizzle = 0;
return GPA_ERR_OK;
}
/// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
static const uint8_t bankRotationArray[4][16] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_2_BANK
{0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_4_BANK
{0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SURF_8_BANK
{0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9}, // ADDR_SURF_16_BANK
};
const uint32_t numBanks = 2 << tileinfo.banks;
const uint32_t numPipes = gpaGetPipeCount(tileinfo.pipeconfig);
const uint32_t bankSwizzle = bankRotationArray[tileinfo.banks][surfindex & (numBanks - 1)];
uint32_t pipeswizzle = 0;
if (ismacrotiled3d(tileinfo.arraymode)) {
pipeswizzle = surfindex & (numPipes - 1);
}
*outswizzle = GetBankPipeSwizzle(bankSwizzle, pipeswizzle, 0, &tileinfo);
return GPA_ERR_OK;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,375 @@
// SPDX-FileCopyrightText: Copyright 2024 freegnm Project
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdbool>
#include "common/types.h"
#include "video_core/amdgpu/gpuaddr/dataformat.h"
enum GnmSampleSplit {
GNM_ADDR_SAMPLE_SPLIT_1 = 0x0,
GNM_ADDR_SAMPLE_SPLIT_2 = 0x1,
GNM_ADDR_SAMPLE_SPLIT_4 = 0x2,
GNM_ADDR_SAMPLE_SPLIT_8 = 0x3,
};
enum GnmMicroTileMode {
GNM_SURF_DISPLAY_MICRO_TILING = 0x0,
GNM_SURF_THIN_MICRO_TILING = 0x1,
GNM_SURF_DEPTH_MICRO_TILING = 0x2,
GNM_SURF_ROTATED_MICRO_TILING = 0x3,
GNM_SURF_THICK_MICRO_TILING = 0x4,
};
enum GnmMacroTileMode {
GNM_MACROTILEMODE_1x4_16 = 0x0,
GNM_MACROTILEMODE_1x2_16 = 0x1,
GNM_MACROTILEMODE_1x1_16 = 0x2,
GNM_MACROTILEMODE_1x1_16_DUP = 0x3,
GNM_MACROTILEMODE_1x1_8 = 0x4,
GNM_MACROTILEMODE_1x1_4 = 0x5,
GNM_MACROTILEMODE_1x1_2 = 0x6,
GNM_MACROTILEMODE_1x1_2_DUP = 0x7,
GNM_MACROTILEMODE_1x8_16 = 0x8,
GNM_MACROTILEMODE_1x4_16_DUP = 0x9,
GNM_MACROTILEMODE_1x2_16_DUP = 0xa,
GNM_MACROTILEMODE_1x1_16_DUP2 = 0xb,
GNM_MACROTILEMODE_1x1_8_DUP = 0xc,
GNM_MACROTILEMODE_1x1_4_DUP = 0xd,
GNM_MACROTILEMODE_1x1_2_DUP2 = 0xe,
GNM_MACROTILEMODE_1x1_2_DUP3 = 0xf,
};
enum GnmTileMode {
GNM_TM_DEPTH_2D_THIN_64 = 0x0,
GNM_TM_DEPTH_2D_THIN_128 = 0x1,
GNM_TM_DEPTH_2D_THIN_256 = 0x2,
GNM_TM_DEPTH_2D_THIN_512 = 0x3,
GNM_TM_DEPTH_2D_THIN_1K = 0x4,
GNM_TM_DEPTH_1D_THIN = 0x5,
GNM_TM_DEPTH_2D_THIN_PRT_256 = 0x6,
GNM_TM_DEPTH_2D_THIN_PRT_1K = 0x7,
GNM_TM_DISPLAY_LINEAR_ALIGNED = 0x8,
GNM_TM_DISPLAY_1D_THIN = 0x9,
GNM_TM_DISPLAY_2D_THIN = 0xa,
GNM_TM_DISPLAY_THIN_PRT = 0xb,
GNM_TM_DISPLAY_2D_THIN_PRT = 0xc,
GNM_TM_THIN_1D_THIN = 0xd,
GNM_TM_THIN_2D_THIN = 0xe,
GNM_TM_THIN_3D_THIN = 0xf,
GNM_TM_THIN_THIN_PRT = 0x10,
GNM_TM_THIN_2D_THIN_PRT = 0x11,
GNM_TM_THIN_3D_THIN_PRT = 0x12,
GNM_TM_THICK_1D_THICK = 0x13,
GNM_TM_THICK_2D_THICK = 0x14,
GNM_TM_THICK_3D_THICK = 0x15,
GNM_TM_THICK_THICK_PRT = 0x16,
GNM_TM_THICK_2D_THICK_PRT = 0x17,
GNM_TM_THICK_3D_THICK_PRT = 0x18,
GNM_TM_THICK_2D_XTHICK = 0x19,
GNM_TM_THICK_3D_XTHICK = 0x1a,
GNM_TM_DISPLAY_LINEAR_GENERAL = 0x1f,
};
enum GnmArrayMode {
GNM_ARRAY_LINEAR_GENERAL = 0x0,
GNM_ARRAY_LINEAR_ALIGNED = 0x1,
GNM_ARRAY_1D_TILED_THIN1 = 0x2,
GNM_ARRAY_1D_TILED_THICK = 0x3,
GNM_ARRAY_2D_TILED_THIN1 = 0x4,
GNM_ARRAY_PRT_TILED_THIN1 = 0x5,
GNM_ARRAY_PRT_2D_TILED_THIN1 = 0x6,
GNM_ARRAY_2D_TILED_THICK = 0x7,
GNM_ARRAY_2D_TILED_XTHICK = 0x8,
GNM_ARRAY_PRT_TILED_THICK = 0x9,
GNM_ARRAY_PRT_2D_TILED_THICK = 0xa,
GNM_ARRAY_PRT_3D_TILED_THIN1 = 0xb,
GNM_ARRAY_3D_TILED_THIN1 = 0xc,
GNM_ARRAY_3D_TILED_THICK = 0xd,
GNM_ARRAY_3D_TILED_XTHICK = 0xe,
GNM_ARRAY_PRT_3D_TILED_THICK = 0xf,
};
enum GnmNumBanks {
GNM_SURF_2_BANK = 0x0,
GNM_SURF_4_BANK = 0x1,
GNM_SURF_8_BANK = 0x2,
GNM_SURF_16_BANK = 0x3,
};
enum GnmGpuMode {
GNM_GPU_BASE = 0x0,
GNM_GPU_NEO = 0x1,
};
enum GnmBankWidth {
GNM_SURF_BANK_WIDTH_1 = 0x0,
GNM_SURF_BANK_WIDTH_2 = 0x1,
GNM_SURF_BANK_WIDTH_4 = 0x2,
GNM_SURF_BANK_WIDTH_8 = 0x3,
};
enum GnmBankHeight {
GNM_SURF_BANK_HEIGHT_1 = 0x0,
GNM_SURF_BANK_HEIGHT_2 = 0x1,
GNM_SURF_BANK_HEIGHT_4 = 0x2,
GNM_SURF_BANK_HEIGHT_8 = 0x3,
};
enum GnmPipeConfig {
GNM_ADDR_SURF_P2 = 0x0,
GNM_ADDR_SURF_P4_8x16 = 0x4,
GNM_ADDR_SURF_P4_16x16 = 0x5,
GNM_ADDR_SURF_P4_16x32 = 0x6,
GNM_ADDR_SURF_P4_32x32 = 0x7,
GNM_ADDR_SURF_P8_16x16_8x16 = 0x8,
GNM_ADDR_SURF_P8_16x32_8x16 = 0x9,
GNM_ADDR_SURF_P8_32x32_8x16 = 0xa,
GNM_ADDR_SURF_P8_16x32_16x16 = 0xb,
GNM_ADDR_SURF_P8_32x32_16x16 = 0xc,
GNM_ADDR_SURF_P8_32x32_16x32 = 0xd,
GNM_ADDR_SURF_P8_32x64_32x32 = 0xe,
GNM_ADDR_SURF_P16_32x32_8x16 = 0x10,
GNM_ADDR_SURF_P16_32x32_16x16 = 0x11,
};
enum GnmMacroTileAspect {
GNM_SURF_MACRO_ASPECT_1 = 0x0,
GNM_SURF_MACRO_ASPECT_2 = 0x1,
GNM_SURF_MACRO_ASPECT_4 = 0x2,
GNM_SURF_MACRO_ASPECT_8 = 0x3,
};
enum GnmTileSplit {
GNM_SURF_TILE_SPLIT_64B = 0x0,
GNM_SURF_TILE_SPLIT_128B = 0x1,
GNM_SURF_TILE_SPLIT_256B = 0x2,
GNM_SURF_TILE_SPLIT_512B = 0x3,
GNM_SURF_TILE_SPLIT_1KB = 0x4,
GNM_SURF_TILE_SPLIT_2KB = 0x5,
GNM_SURF_TILE_SPLIT_4KB = 0x6,
};
enum GpaSurfaceType {
GPA_SURFACE_COLORDISPLAY,
GPA_SURFACE_COLOR,
GPA_SURFACE_DEPTHSTENCIL,
GPA_SURFACE_DEPTH,
GPA_SURFACE_STENCIL,
GPA_SURFACE_FMASK,
GPA_SURFACE_TEXTUREFLAT,
GPA_SURFACE_TEXTUREVOLUME,
GPA_SURFACE_TEXTURECUBEMAP,
GPA_SURFACE_RWTEXTUREFLAT,
GPA_SURFACE_RWTEXTUREVOLUME,
GPA_SURFACE_RWTEXTURECUBEMAP,
};
struct GpaSurfaceFlags {
u32 colortarget : 1;
u32 depthtarget : 1;
u32 stenciltarget : 1;
u32 texture : 1;
u32 cube : 1;
u32 volume : 1;
u32 fmask : 1;
u32 cubeasarray : 1;
u32 overlay : 1;
u32 display : 1;
u32 prt : 1;
u32 pow2pad : 1;
u32 texcompatible : 1;
u32 _unused : 19;
};
static_assert(sizeof(GpaSurfaceFlags) == 0x4, "");
struct GpaSurfaceProperties {
GnmTileMode tilemode;
GpaSurfaceFlags flags;
};
struct GpaHtileParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmArrayMode arraymode;
GnmNumBanks banks;
GnmPipeConfig pipeconfig;
GnmGpuMode mingpumode;
struct {
u32 tccompatible : 1;
u32 reserved : 31;
} flags;
};
struct GpaCmaskParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmTileMode tilemode;
GnmGpuMode mingpumode;
struct {
u32 tccompatible : 1;
u32 reserved : 31;
} flags;
};
struct GpaFmaskParams {
u32 pitch;
u32 height;
u32 numslices;
u32 numfrags;
u32 bpp;
GnmTileMode tilemode;
GnmGpuMode mingpumode;
bool isblockcompressed;
};
struct GpaTileInfo {
GnmArrayMode arraymode;
GnmNumBanks banks;
GnmBankWidth bankwidth;
GnmBankHeight bankheight;
GnmMacroTileAspect macroaspectratio;
GnmTileSplit tilesplit;
GnmPipeConfig pipeconfig;
};
struct GpaSurfaceInfo {
u32 pitch;
u32 height;
u32 depth;
uint64_t surfacesize;
u32 basealign;
u32 pitchalign;
u32 heightalign;
u32 depthalign;
u32 bitsperelem;
u32 blockwidth;
u32 blockheight;
GnmTileMode tilemode;
GpaTileInfo tileinfo;
struct {
u32 istexcompatible : 1;
u32 _unused : 31;
};
};
struct GpaHtileInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 bpp;
u32 macrowidth;
u32 macroheight;
uint64_t htilebytes;
uint64_t slicebytes;
};
struct GpaCmaskInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 bpp;
u32 macrowidth;
u32 macroheight;
u32 blockmax;
uint64_t cmaskbytes;
uint64_t slicebytes;
};
struct GpaFmaskInfo {
u32 pitch;
u32 height;
u32 basealign;
u32 pitchalign;
u32 heightalign;
u32 bpp;
uint64_t fmaskbytes;
uint64_t slicebytes;
};
struct GpaSurfaceIndex {
u32 arrayindex;
u32 face;
u32 mip;
u32 depth;
u32 fragment;
u32 sample;
};
struct GpaTilingParams {
GnmTileMode tilemode;
GnmGpuMode mingpumode;
u32 linearwidth;
u32 linearheight;
u32 lineardepth;
u32 numfragsperpixel;
u32 basetiledpitch;
u32 miplevel;
u32 arrayslice;
GpaSurfaceFlags surfaceflags;
u32 bitsperfrag;
bool isblockcompressed;
};
struct GpaSurfaceRegion {
u32 left; // -X
u32 top; // -Y
u32 front; // -Z
u32 right; // +X
u32 bottom; // +Y
u32 back; // +Z
};
enum GnmTextureType {
GNM_TEXTURE_1D = 0x8,
GNM_TEXTURE_2D = 0x9,
GNM_TEXTURE_3D = 0xa,
GNM_TEXTURE_CUBEMAP = 0xb,
GNM_TEXTURE_1D_ARRAY = 0xc,
GNM_TEXTURE_2D_ARRAY = 0xd,
GNM_TEXTURE_2D_MSAA = 0xe,
GNM_TEXTURE_2D_ARRAY_MSAA = 0xf,
};
struct GpaTextureInfo {
GnmTextureType type;
GnmDataFormat fmt;
u32 width;
u32 height;
u32 pitch;
u32 depth;
u32 numfrags;
u32 nummips;
u32 numslices;
GnmTileMode tm;
GnmGpuMode mingpumode;
bool pow2pad;
};

View File

@ -10,11 +10,9 @@
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
#include <coroutine>
#include <functional>
#include <future>
#include <span>
#include <mutex>
#include <thread>
#include <queue>
@ -333,6 +331,14 @@ struct Liverpool {
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
u64 Address() const {
return u64(z_read_base) << 8;
}
[[nodiscard]] size_t GetSizeAligned() const {
return depth_slice.tile_max * 8;
}
};
enum class ClipSpace : u32 {
@ -564,6 +570,7 @@ struct Liverpool {
Subtract = 1,
Min = 2,
Max = 3,
ReverseSubtract = 4,
};
BitField<0, 5, BlendFactor> color_src_factor;
@ -612,7 +619,7 @@ struct Liverpool {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberFormat> number_type;
BitField<8, 3, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
@ -680,7 +687,7 @@ struct Liverpool {
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
};

View File

@ -7,6 +7,7 @@
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
namespace AmdGpu {
@ -132,10 +133,21 @@ struct Image {
}
u32 NumLayers() const {
return last_array - base_array + 1;
u32 slices = type == ImageType::Color3D ? 1 : depth.Value() + 1;
if (type == ImageType::Cube) {
slices *= 6;
}
if (pow2pad) {
slices = std::bit_ceil(slices);
}
return slices;
}
u32 NumLevels() const {
if (type == ImageType::Color2DMsaa ||
type == ImageType::Color2DMsaaArray) {
return 1;
}
return last_level + 1;
}
@ -155,9 +167,29 @@ struct Image {
return GetTilingMode() != TilingMode::Display_Linear;
}
[[nodiscard]] size_t GetSizeAligned() const {
// TODO: Derive this properly from tiling params
return (width + 1) * (height + 1) * NumComponents(GetDataFmt());
[[nodiscard]] size_t GetSizeAligned(const GpaTextureInfo& texinfo) const {
GpaTilingParams tp = {};
GpaError err = gpaTpInit(&tp, &texinfo, 0, 0);
ASSERT(err == GPA_ERR_OK);
GpaSurfaceInfo surfinfo = {};
size_t size = {};
for (uint32_t i = 0; i < NumLevels(); i += 1) {
tp.linearwidth = std::max(texinfo.width >> i, 1U);
tp.linearheight = std::max(texinfo.height >> i, 1U);
tp.lineardepth = std::max(texinfo.depth >> i, 1U);
tp.miplevel = i;
err = gpaComputeSurfaceInfo(&surfinfo, &tp);
ASSERT(err == GPA_ERR_OK);
size += NumLayers() * surfinfo.surfacesize;
if (tp.linearwidth == 1 && tp.linearheight == 1 &&
tp.lineardepth == 1) {
break;
}
}
return size;
}
};

View File

@ -176,6 +176,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
return vk::BlendOp::eMin;
case BlendFunc::Max:
return vk::BlendOp::eMax;
case BlendFunc::ReverseSubtract:
return vk::BlendOp::eReverseSubtract;
default:
UNREACHABLE();
}
@ -289,7 +291,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eB8G8R8A8Srgb;
return vk::Format::eR8G8B8A8Srgb;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
@ -316,7 +318,23 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc7SrgbBlock;
}
UNREACHABLE();
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc1RgbaUnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc3UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR8G8B8A8Uint;
}
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32Sfloat;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {

View File

@ -63,8 +63,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pVertexAttributeDescriptions = attributes.data(),
};
ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(),
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
LOG_WARNING(Render_Vulkan,
"Rectangle List primitive type is only supported for embedded VS");
}
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = LiverpoolToVK::PrimitiveType(key.prim_type),

View File

@ -41,9 +41,11 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers)
physical_devices{instance->enumeratePhysicalDevices()} {}
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index)
: instance{CreateInstance(dl, window.getWindowInfo().type, true, false)},
debug_callback{CreateDebugCallback(*instance)},
: enable_validation{false}, instance{CreateInstance(dl, window.getWindowInfo().type, enable_validation, false)},
physical_devices{instance->enumeratePhysicalDevices()} {
if (enable_validation) {
debug_callback = CreateDebugCallback(*instance);
}
const std::size_t num_physical_devices = static_cast<u16>(physical_devices.size());
ASSERT_MSG(num_physical_devices > 0, "No physical devices found");

View File

@ -194,6 +194,7 @@ private:
private:
vk::DynamicLoader dl;
bool enable_validation{};
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;

View File

@ -112,6 +112,8 @@ void PipelineCache::RefreshGraphicsKey() {
key.color_formats[remapped_cb] =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
key.blend_controls[remapped_cb] = regs.blend_control[cb];
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
++remapped_cb;
@ -160,6 +162,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling shader {:#x}", hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

View File

@ -140,10 +140,7 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
// Sanitize extension list
std::erase_if(extensions, [&](const char* extension) -> bool {

View File

@ -41,6 +41,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
color_attachments{};
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_depth_attachments{};
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
@ -57,6 +59,17 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.storeOp = vk::AttachmentStoreOp::eStore,
});
}
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
};
num_depth_attachments++;
}
// TODO: Don't restart renderpass every draw
const auto& scissor = regs.screen_scissor;
@ -69,6 +82,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.layerCount = 1,
.colorAttachmentCount = static_cast<u32>(color_attachments.size()),
.pColorAttachments = color_attachments.data(),
.pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr,
};
UpdateDynamicState(*pipeline);
@ -78,7 +92,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices;
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
? 4
: regs.num_indices;
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
@ -162,7 +178,7 @@ void Rasterizer::UpdateViewportScissorState() {
.y = regs.viewports[0].yoffset - regs.viewports[0].yscale,
.width = regs.viewports[0].xscale * 2.0f,
.height = regs.viewports[0].yscale * 2.0f,
.minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale,
.minDepth = /*regs.viewports[0].zoffset - regs.viewports[0].zscale*/ 0.f,
.maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset,
};
const vk::Rect2D scissor{

View File

@ -10,6 +10,7 @@
#include "video_core/texture_cache/tile_manager.h"
#include <vk_mem_alloc.h>
#include <vulkan/vulkan_format_traits.hpp>
namespace VideoCore {
@ -37,10 +38,11 @@ static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eSampled;
if (false /*&& IsDepthStencilFormat(format)*/) {
if (format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD32Sfloat) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
if (format != vk::Format::eBc3SrgbBlock) {
if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock &&
format != vk::Format::eBc1RgbaUnormBlock) {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
}
}
@ -53,10 +55,10 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color1DArray:
case AmdGpu::ImageType::Color2DArray:
case AmdGpu::ImageType::Cube:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e3D;
default:
UNREACHABLE();
@ -97,6 +99,18 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_size_bytes = buffer.GetSizeAligned();
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
}
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
is_tiled = image.IsTiled();
tiling_mode = image.GetTilingMode();
@ -108,7 +122,28 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
guest_size_bytes = image.GetSizeAligned();
texinfo = GpaTextureInfo{
.type = static_cast<GnmTextureType>(image.type.Value()),
.fmt = {
.surfacefmt = static_cast<GnmImageFormat>(image.data_format.Value()),
.chantype = static_cast<GnmImgNumFormat>(image.num_format.Value()),
.chanx = static_cast<GnmChannel>(image.dst_sel_x.Value()),
.chany = static_cast<GnmChannel>(image.dst_sel_y.Value()),
.chanz = static_cast<GnmChannel>(image.dst_sel_z.Value()),
.chanw = static_cast<GnmChannel>(image.dst_sel_w.Value()),
},
.width = static_cast<u32>(image.width.Value() + 1),
.height = static_cast<u32>(image.height.Value() + 1),
.pitch = image.Pitch(),
.depth = 1,
.numfrags = 1,
.nummips = image.NumLevels(),
.numslices = image.NumLayers(),
.tm = static_cast<GnmTileMode>(image.tiling_index.Value()),
.mingpumode = GNM_GPU_BASE,
.pow2pad = bool(image.pow2pad.Value()),
};
guest_size_bytes = image.GetSizeAligned(texinfo);
}
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
@ -152,16 +187,13 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.type == vk::ImageType::e3D) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
if (info.is_tiled) {
flags |= vk::ImageCreateFlagBits::eExtendedUsage;
if (false) { // IsBlockCodedFormat()
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
}
info.usage = ImageUsageFlags(info.pixel_format);
if (info.is_tiled || info.is_storage) {
info.usage |= vk::ImageUsageFlagBits::eStorage;
if (info.pixel_format == vk::Format::eD32Sfloat) {
aspect_mask = vk::ImageAspectFlagBits::eDepth;
}
if (info.pixel_format == vk::Format::eD32SfloatS8Uint) {
aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
}
const vk::ImageCreateInfo image_ci = {
@ -187,7 +219,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
ImageViewInfo view_info;
view_info.format = DemoteImageFormatForDetiling(info.pixel_format);
view_info.used_for_detiling = true;
view_for_detiler.emplace(*instance, view_info, image);
view_for_detiler.emplace(*instance, view_info, *this);
}
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
@ -198,7 +230,8 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
return;
}
const vk::ImageMemoryBarrier barrier = {.srcAccessMask = access_mask,
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = access_mask,
.dstAccessMask = dst_mask,
.oldLayout = layout,
.newLayout = dst_layout,
@ -211,10 +244,11 @@ void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> ds
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
}};
},
};
// Adjust pipieline stage
vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead ||
const vk::PipelineStageFlagBits dst_pl_stage = (dst_mask == vk::AccessFlagBits::eTransferRead ||
dst_mask == vk::AccessFlagBits::eTransferWrite)
? vk::PipelineStageFlagBits::eTransfer
: vk::PipelineStageFlagBits::eAllGraphics;

View File

@ -8,6 +8,7 @@
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/gpuaddr/gpuaddr.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
@ -38,6 +39,8 @@ struct ImageInfo {
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
bool is_tiled = false;
@ -50,6 +53,7 @@ struct ImageInfo {
u32 pitch = 0;
u32 guest_size_bytes = 0;
AmdGpu::TilingMode tiling_mode{AmdGpu::TilingMode::Display_Linear};
GpaTextureInfo texinfo{};
};
struct UniqueImage {

View File

@ -3,6 +3,7 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_view.h"
namespace VideoCore {
@ -58,7 +59,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
}
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image,
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
: info{info_} {
vk::ImageViewUsageCreateInfo usage_ci{};
@ -68,14 +69,14 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = usage_override.has_value() ? &usage_ci : nullptr,
.image = image,
.image = image.image,
.viewType = info.type,
.format = info.format,
.components = info.mapping,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.aspectMask = image.aspect_mask,
.baseMipLevel = 0U,
.levelCount = 1,
.levelCount = 1u,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},

View File

@ -29,8 +29,10 @@ struct ImageViewInfo {
auto operator<=>(const ImageViewInfo&) const = default;
};
struct Image;
struct ImageView {
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image,
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
std::optional<vk::ImageUsageFlags> usage_override = {});
~ImageView();

View File

@ -4,6 +4,7 @@
#include <xxhash.h>
#include "common/assert.h"
#include "common/config.h"
#include "common/error.h"
#include "core/virtual_memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -23,7 +24,7 @@
void mprotect(void* addr, size_t len, int prot) {
DWORD old_prot{};
BOOL result = VirtualProtect(addr, len, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
ASSERT_MSG(result != 0, "Region protection failed {}", Common::GetLastErrorMsg());
}
#endif
@ -93,7 +94,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
ASSERT(null_id.index == 0);
ImageViewInfo view_info;
void(slot_image_views.insert(instance, view_info, slot_images[null_id].image));
void(slot_image_views.insert(instance, view_info, slot_images[null_id]));
}
TextureCache::~TextureCache() {
@ -112,7 +113,7 @@ void TextureCache::OnCpuWrite(VAddr address) {
});
}
Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) {
std::unique_lock lock{m_page_table};
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
@ -132,7 +133,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
}
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
@ -153,8 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage;
}
const ImageViewId view_id =
slot_image_views.insert(instance, view_info, image.image, usage_override);
const ImageViewId view_id = slot_image_views.insert(instance, view_info, image, usage_override);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
@ -170,7 +170,19 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address());
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
ImageViewInfo view_info;
view_info.format = info.pixel_format;
return RegisterImageView(image, view_info);
}
ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
ImageViewInfo view_info;
view_info.format = info.pixel_format;
@ -181,81 +193,58 @@ void TextureCache::RefreshImage(Image& image) {
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
{
if (!tile_manager.TryDetile(image)) {
// Upload data to the staging buffer.
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
std::memcpy(data, image_data, image.info.guest_size_bytes);
const auto [staging_data, offset, _] = staging.Map(image.info.guest_size_bytes, 16);
if (image.info.texinfo.tm == GnmTileMode::GNM_TM_DISPLAY_LINEAR_GENERAL) {
std::memcpy(staging_data, image_data, image.info.guest_size_bytes);
} else {
const GpaError res = gpaDetileTextureAll(image_data, image.info.guest_size_bytes, staging_data,
image.info.guest_size_bytes, &image.info.texinfo);
ASSERT_MSG(res == GPA_ERR_OK, "Texture detiling failed with error: {}", gpaStrError(res));
}
staging.Commit(image.info.guest_size_bytes);
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
// The mipmaps of each slice are next to each other in memory. So we iterate each layer
// and detile its mipmaps. Vulkan allows us to copy to the same mipmap of multiple layers at
// once, so we try to upload in that order.
boost::container::small_vector<vk::BufferImageCopy, 50> image_copies;
for (u32 mip = 0; mip < image.info.resources.levels; mip++) {
// Initialize tiling parameters.
GpaTilingParams tp = {};
GpaError res = gpaTpInit(&tp, &image.info.texinfo, mip, 0);
ASSERT(res == GPA_ERR_OK);
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
// Figure out the offset of the slice0 mip in the image data and its size.
u64 surfoffset = 0;
u64 surfsize = 0;
res = gpaCalcSurfaceSizeOffset(&surfsize, &surfoffset, &image.info.texinfo, mip, 0);
ASSERT(res == GPA_ERR_OK);
// Add a new buffer copy for later.
image_copies.push_back({
.bufferOffset = offset + surfoffset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
}
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
return;
}
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
for (u32 m = 0; m < image.info.resources.levels; m++) {
const u32 width = image.info.size.width >> m;
const u32 height = image.info.size.height >> m;
const u32 map_size = width * height * image.info.resources.layers;
// Upload data to the staging buffer.
const auto [data, offset, _] = staging.Map(map_size, 16);
if (image.info.is_tiled) {
ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode());
} else {
std::memcpy(data, image_data, map_size);
}
staging.Commit(map_size);
image_data += map_size;
// Copy to the image.
const vk::BufferImageCopy image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = m,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = u32(image.info.resources.layers),
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, 1},
};
.imageExtent = {image.info.size.width >> mip, image.info.size.height >> mip, 1},
});
}
// Perform copy.
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
cmdbuf.copyBufferToImage(staging.Handle(), image.image,
vk::ImageLayout::eTransferDstOptimal, image_copy);
vk::ImageLayout::eTransferDstOptimal, image_copies);
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
}
}
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
@ -331,7 +320,7 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
const u32 interval_size = interval_end_addr - interval_start_addr;
void* addr = reinterpret_cast<void*>(interval_start_addr);
if (delta > 0 && count == delta) {
mprotect(addr, interval_size, PAGE_READONLY);
//mprotect(addr, interval_size, PAGE_READONLY);
} else if (delta < 0 && count == -delta) {
mprotect(addr, interval_size, PAGE_READWRITE);
} else {

View File

@ -37,7 +37,8 @@ public:
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address);
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address,
bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);
@ -45,6 +46,8 @@ public:
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
[[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
/// Reuploads image contents.
void RefreshImage(Image& image);

View File

@ -304,7 +304,7 @@ bool TileManager::TryDetile(Image& image) {
return false;
}
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 64);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
std::memcpy(data, image_data, image.info.guest_size_bytes);
staging.Commit(image.info.guest_size_bytes);