video_core: Various gpu fixes

This commit is contained in:
raphaelthegreat 2024-06-06 19:08:40 +03:00
parent 5aa3a4d4a0
commit 6cec16225d
31 changed files with 521 additions and 47 deletions

View File

@ -184,8 +184,8 @@ void IOFile::Open(const fs::path& path, FileAccessMode mode, FileType type, File
if (!IsOpen()) {
const auto ec = std::error_code{errno, std::generic_category()};
LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}",
PathToUTF8String(file_path), ec.message());
//LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}",
// PathToUTF8String(file_path), ec.message());
}
}

View File

@ -189,7 +189,6 @@ public:
} else {
ForEachBackend([&entry](auto& backend) { backend.Write(entry); });
}
std::fflush(stdout);
}
private:

View File

@ -12,7 +12,7 @@
namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
//LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
@ -170,7 +170,7 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) {
}
int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
//LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
std::string path_name = mnt->GetHostFile(path);
memset(sb, 0, sizeof(OrbisKernelStat));
@ -198,7 +198,18 @@ int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) {
int result = sceKernelStat(path, sb);
if (result < 0) {
UNREACHABLE(); // TODO
return result;
//UNREACHABLE(); // TODO
}
return ORBIS_OK;
}
int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) {
//LOG_INFO(Lib_Kernel, "path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
std::string path_name = mnt->GetHostFile(path);
if (!std::filesystem::exists(path_name)) {
return SCE_KERNEL_ERROR_ENOENT;
}
return ORBIS_OK;
}
@ -216,6 +227,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("1-LFLmRFxxM", "libkernel", 1, "libkernel", 1, 1, sceKernelMkdir);
LIB_FUNCTION("eV9wAD2riIA", "libkernel", 1, "libkernel", 1, 1, sceKernelStat);
LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat);
LIB_FUNCTION("uWyW3v98sU4", "libkernel", 1, "libkernel", 1, 1, sceKernelCheckReachability);
// openOrbis (to check if it is valid out of OpenOrbis
LIB_FUNCTION("6c3rCVE-fTU", "libkernel", 1, "libkernel", 1, 1,

View File

@ -3,6 +3,7 @@
#include <mutex>
#include <thread>
#include <semaphore.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/singleton.h"
@ -392,7 +393,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut
int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr);
if (name != nullptr) {
LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
//LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result);
}
switch (result) {
@ -417,7 +418,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) {
int result = pthread_mutex_destroy(&(*mutex)->pth_mutex);
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
//LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
delete *mutex;
*mutex = nullptr;
@ -1036,6 +1037,207 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
return linker->TlsGetAddr(index->ti_module, index->ti_offset);
}
int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) {
if (cond == nullptr) {
return SCE_KERNEL_ERROR_EINVAL;
}
int result = pthread_cond_destroy(&(*cond)->cond);
LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result);
switch (result) {
case 0:
return SCE_OK;
case EBUSY:
return SCE_KERNEL_ERROR_EBUSY;
default:
return SCE_KERNEL_ERROR_EINVAL;
}
}
int PS4_SYSV_ABI posix_pthread_condattr_init(ScePthreadCondattr* attr) {
int result = scePthreadCondattrInit(attr);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrInit: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadCondattr* attr) {
int result = scePthreadCondInit(cond, attr, "");
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondInit: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_condattr_destroy(ScePthreadCondattr* attr) {
int result = scePthreadCondattrDestroy(attr);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrDestroy: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_init(ScePthreadAttr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrInit(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t stacksize) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrSetstacksize(attr, stacksize);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int detachstate) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrSetdetachstate(attr, detachstate);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadMutexattrInit(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexattrSettype(attr, type);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_destroy(ScePthreadMutexattr* attr) {
int result = scePthreadMutexattrDestroy(attr);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) {
int result = scePthreadMutexattrSetprotocol(attr, protocol);
LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result);
if (result < 0) {
UNREACHABLE();
}
return result;
}
int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_t* size) {
int result = pthread_attr_getstack(&(*attr)->pth_attr, addr, size);
LOG_INFO(Kernel_Pthread, "scePthreadAttrGetstack: result = {}", result);
if (result == 0) {
return SCE_OK;
}
return SCE_KERNEL_ERROR_EINVAL;
}
int PS4_SYSV_ABI posix_pthread_attr_destroy(ScePthreadAttr* attr) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit");
int result = scePthreadAttrDestroy(attr);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* attr,
pthreadEntryFunc start_routine, void* arg) {
LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate");
int result = scePthreadCreate(thread, attr, start_routine, arg, "PS4_Thread");
if (result != 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_detach(ScePthread thread) {
LOG_INFO(Kernel_Pthread, "thread detach name = {}", thread->name);
thread->is_detached = true;
return ORBIS_OK;
}
int PS4_SYSV_ABI posix_sem_init(sem_t* sem, int pshared, unsigned int value) {
return sem_init(sem, pshared, value);
}
int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) {
return sem_wait(sem);
}
int PS4_SYSV_ABI posix_sem_post(sem_t* sem) {
return sem_post(sem);
}
int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexDestroy(mutex);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** value_ptr) {
return pthread_join(thread->pth, value_ptr);
}
int PS4_SYSV_ABI posix_pthread_mutex_trylock(ScePthreadMutex* mutex) {
int result = scePthreadMutexTrylock(mutex);
if (result < 0) {
UNREACHABLE();
}
return result;
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy);
LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate);
@ -1048,6 +1250,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf);
LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, pthread_self);
LIB_FUNCTION("EotR8a3ASf4", "libScePosix", 1, "libkernel", 1, 1, pthread_self);
LIB_FUNCTION("3qxgM4ezETA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetaffinity);
LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity);
LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet);
@ -1091,6 +1294,36 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
LIB_FUNCTION("2Z+PpY6CaJg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("mkx2fVhNMsg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack);
LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach);
LIB_FUNCTION("g+PZd2hiacg", "libkernel", 1, "libkernel", 1, 1, scePthreadCondDestroy);
// posix calls
LIB_FUNCTION("wtkt-teR1so", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_init);
LIB_FUNCTION("2Q0z6rnBrTE", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_attr_setstacksize);
LIB_FUNCTION("E+tyo3lp5Lw", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_attr_setdetachstate);
LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create);
LIB_FUNCTION("zHchY8ft5pk", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_destroy);
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
LIB_FUNCTION("mKoTx03HRWA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_init);
LIB_FUNCTION("dJcuQVn6-Iw", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_condattr_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_settype);
LIB_FUNCTION("5txKfcMUAok", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_setprotocol);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("K-jXhbt2gn4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_trylock);
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init);
LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait);
LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post);
LIB_FUNCTION("HF7lK46xzjY", "libScePosix", 1, "libkernel", 1, 1,
posix_pthread_mutexattr_destroy);
}
} // namespace Libraries::Kernel

View File

@ -1,10 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include "common/native_clock.h"
#include "core/libraries/kernel/time_management.h"
#include "core/libraries/libs.h"
#ifdef _WIN64
#include <windows.h>
#endif
namespace Libraries::Kernel {
static u64 initial_ptc;
@ -30,6 +35,96 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() {
return clock->GetUptime();
}
int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) {
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
return 0;
}
int PS4_SYSV_ABI posix_usleep(u32 microseconds) {
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
return 0;
}
u32 PS4_SYSV_ABI sceKernelSleep(u32 seconds) {
std::this_thread::sleep_for(std::chrono::seconds(seconds));
return 0;
}
#define FILETIME_1970 116444736000000000ull /* seconds between 1/1/1601 and 1/1/1970 */
#define HECTONANOSEC_PER_SEC 10000000ull
struct SceKernelTimeval {
time_t tv_sec;
s64 tv_usec;
};
struct timezone {
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
struct timeval {
long tv_sec;
long tv_usec;
};
int PS4_SYSV_ABI getntptimeofday(struct timespec* tp, struct timezone* z) {
int res = 0;
union {
unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
FILETIME ft;
} _now;
TIME_ZONE_INFORMATION TimeZoneInformation;
DWORD tzi;
if (z != NULL) {
if ((tzi = GetTimeZoneInformation(&TimeZoneInformation)) != TIME_ZONE_ID_INVALID) {
z->tz_minuteswest = TimeZoneInformation.Bias;
if (tzi == TIME_ZONE_ID_DAYLIGHT)
z->tz_dsttime = 1;
else
z->tz_dsttime = 0;
} else {
z->tz_minuteswest = 0;
z->tz_dsttime = 0;
}
}
if (tp != NULL) {
typedef void(WINAPI * GetSystemTimeAsFileTime_t)(LPFILETIME);
static GetSystemTimeAsFileTime_t GetSystemTimeAsFileTime_p /* = 0 */;
/* Set function pointer during first call */
GetSystemTimeAsFileTime_t get_time =
__atomic_load_n(&GetSystemTimeAsFileTime_p, __ATOMIC_RELAXED);
if (get_time == NULL) {
/* Use GetSystemTimePreciseAsFileTime() if available (Windows 8 or later) */
get_time = (GetSystemTimeAsFileTime_t)(intptr_t)GetProcAddress(
GetModuleHandle("kernel32.dll"),
"GetSystemTimePreciseAsFileTime"); /* <1us precision on Windows 10 */
if (get_time == NULL)
get_time = GetSystemTimeAsFileTime; /* >15ms precision on Windows 10 */
__atomic_store_n(&GetSystemTimeAsFileTime_p, get_time, __ATOMIC_RELAXED);
}
get_time(&_now.ft); /* 100 nano-seconds since 1-1-1601 */
_now.ns100 -= FILETIME_1970; /* 100 nano-seconds since 1-1-1970 */
tp->tv_sec = _now.ns100 / HECTONANOSEC_PER_SEC; /* seconds since 1-1-1970 */
tp->tv_nsec = (long)(_now.ns100 % HECTONANOSEC_PER_SEC) * 100; /* nanoseconds */
}
return res;
}
int PS4_SYSV_ABI gettimeofday(struct timeval* p, struct timezone* z) {
struct timespec tp;
if (getntptimeofday(&tp, z))
return -1;
p->tv_sec = tp.tv_sec;
p->tv_usec = (tp.tv_nsec / 1000);
return 0;
}
void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
clock = std::make_unique<Common::NativeClock>();
initial_ptc = clock->GetUptime();
@ -39,6 +134,12 @@ void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
sceKernelGetProcessTimeCounterFrequency);
LIB_FUNCTION("-2IRUCO--PM", "libkernel", 1, "libkernel", 1, 1, sceKernelReadTsc);
LIB_FUNCTION("1j3S3n-tTW4", "libkernel", 1, "libkernel", 1, 1, sceKernelGetTscFrequency);
LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, gettimeofday);
LIB_FUNCTION("n88vx3C5nW8", "libkernel", 1, "libkernel", 1, 1, gettimeofday);
LIB_FUNCTION("1jfXLRVzisc", "libkernel", 1, "libkernel", 1, 1, sceKernelUsleep);
LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep);
LIB_FUNCTION("-ZR+hG7aDHw", "libkernel", 1, "libkernel", 1, 1, sceKernelSleep);
LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, sceKernelSleep);
}
} // namespace Libraries::Kernel

View File

@ -35,7 +35,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
}
// Align free position
free_addr = Common::AlignUp(free_addr, alignment);
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
ASSERT(free_addr >= search_start && free_addr + size <= search_end);
// Add the allocated region to the list and commit its pages.

View File

@ -16,9 +16,10 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
}
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
throw NotImplementedException("SPIR-V Instruction");
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset) {
// TODO
return EmitImageSampleImplicitLod(ctx, inst, handle, coords, bias_lc, offset);
}
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,

View File

@ -334,8 +334,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset);
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id bias_lc, const IR::Value& offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,

View File

@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
fmt::print("Tree:\n {}\n\n", DumpTree(root.children));
return syntax_list;
}

View File

@ -18,11 +18,13 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset =
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::U32 dword_offset =
smrd.imm ? ir.Imm32(smrd.offset) : (smrd.offset == SQ_SRC_LITERAL ? ir.Imm32(inst.src[1].code)
: ir.GetScalarReg(IR::ScalarReg(smrd.offset)));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View File

@ -129,7 +129,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
}
break;
case OperandField::VccHi:
if (force_flt) {
value = ir.BitCast<IR::F32>(ir.GetVccHi());
} else {
value = ir.GetVccHi();
}
break;
default:
UNREACHABLE();
@ -297,6 +301,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MADAK_F32: // Yes these can share the opcode
translator.V_FMA_F32(inst);
break;
case Opcode::IMAGE_SAMPLE_LZ: // TODO?
case Opcode::IMAGE_SAMPLE:
translator.IMAGE_SAMPLE(inst);
break;
@ -351,9 +356,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
case Opcode::S_CMP_LT_I32:
translator.S_CMP(ConditionOp::LT, true, inst);
break;
case Opcode::S_CMP_LG_I32:
translator.S_CMP(ConditionOp::LG, true, inst);
break;
case Opcode::S_CMP_GT_I32:
translator.S_CMP(ConditionOp::GT, true, inst);
break;
case Opcode::S_CMP_EQ_I32:
translator.S_CMP(ConditionOp::EQ, true, inst);
break;
@ -387,6 +398,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_SIN_F32:
translator.V_SIN_F32(inst);
break;
case Opcode::V_COS_F32:
translator.V_COS_F32(inst);
break;
case Opcode::V_LOG_F32:
translator.V_LOG_F32(inst);
break;
@ -522,6 +536,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_RNDNE_F32:
translator.V_RNDNE_F32(inst);
break;
case Opcode::V_BCNT_U32_B32:
translator.V_BCNT_U32_B32(inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:

View File

@ -104,6 +104,8 @@ public:
void V_ASHRREV_I32(const GcnInst& inst);
void V_MAD_U32_U24(const GcnInst& inst);
void V_RNDNE_F32(const GcnInst& inst);
void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View File

@ -372,4 +372,15 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRoundEven(src0));
}
void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1));
}
void Translator::V_COS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPCos(src0));
}
} // namespace Shader::Gcn

View File

@ -31,7 +31,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
ASSERT(!mimg.da);
//ASSERT(!mimg.da);
IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code};

View File

@ -129,6 +129,7 @@ void IREmitter::SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value) {
template <>
U32 IREmitter::GetScalarReg(IR::ScalarReg reg) {
ASSERT(reg < IR::ScalarReg::Max);
return Inst<U32>(Opcode::GetScalarRegister, reg);
}

View File

@ -148,6 +148,9 @@ private:
} // Anonymous namespace
SharpLocation TrackSharp(const IR::Inst* inst) {
while (inst->GetOpcode() == IR::Opcode::Phi) {
inst = inst->Arg(0).InstRecursive();
}
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return SharpLocation{
.sgpr_base = u32(IR::ScalarReg::Max),
@ -163,6 +166,12 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
// Retrieve SGPR pair that holds sbase
const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
while (sbase0->GetOpcode() == IR::Opcode::Phi) {
sbase0 = sbase0->Arg(0).TryInstRecursive();
}
while (sbase1->GetOpcode() == IR::Opcode::Phi) {
sbase1 = sbase1->Arg(0).TryInstRecursive();
}
ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
sbase1->GetOpcode() == IR::Opcode::GetUserData,
"Nested resource loads not supported");

View File

@ -53,8 +53,15 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
fmt::print("Pre SSA passes\n\n{}\n", Shader::IR::DumpProgram(program));
std::fflush(stdout);
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
fmt::print("Post SSA passes\n\n{}\n", Shader::IR::DumpProgram(program));
std::fflush(stdout);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);

View File

@ -333,6 +333,14 @@ struct Liverpool {
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
u64 Address() const {
return u64(z_read_base) << 8;
}
[[nodiscard]] size_t GetSizeAligned() const {
return depth_slice.tile_max * 8;
}
};
enum class ClipSpace : u32 {
@ -564,6 +572,7 @@ struct Liverpool {
Subtract = 1,
Min = 2,
Max = 3,
ReverseSubtract = 4,
};
BitField<0, 5, BlendFactor> color_src_factor;
@ -612,7 +621,7 @@ struct Liverpool {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, DataFormat> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberFormat> number_type;
BitField<8, 3, NumberFormat> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
@ -680,7 +689,7 @@ struct Liverpool {
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
};

View File

@ -176,6 +176,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
return vk::BlendOp::eMin;
case BlendFunc::Max:
return vk::BlendOp::eMax;
case BlendFunc::ReverseSubtract:
return vk::BlendOp::eReverseSubtract;
default:
UNREACHABLE();
}
@ -316,7 +318,19 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc7SrgbBlock;
}
UNREACHABLE();
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc1RgbaUnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc3UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR8G8B8A8Uint;
}
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16Sfloat;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {

View File

@ -63,8 +63,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pVertexAttributeDescriptions = attributes.data(),
};
ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(),
"Rectangle List primitive type is only supported for embedded VS");
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
LOG_WARNING(Render_Vulkan, "Rectangle List primitive type is only supported for embedded VS");
}
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = LiverpoolToVK::PrimitiveType(key.prim_type),

View File

@ -42,8 +42,8 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers)
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index)
: instance{CreateInstance(dl, window.getWindowInfo().type, true, false)},
debug_callback{CreateDebugCallback(*instance)},
physical_devices{instance->enumeratePhysicalDevices()} {
debug_callback = CreateDebugCallback(*instance);
const std::size_t num_physical_devices = static_cast<u16>(physical_devices.size());
ASSERT_MSG(num_physical_devices > 0, "No physical devices found");

View File

@ -112,6 +112,7 @@ void PipelineCache::RefreshGraphicsKey() {
key.color_formats[remapped_cb] =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
key.blend_controls[remapped_cb] = regs.blend_control[cb];
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
++remapped_cb;
@ -160,6 +161,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling shader {:#x}", hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

View File

@ -140,10 +140,7 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
// Sanitize extension list
std::erase_if(extensions, [&](const char* extension) -> bool {

View File

@ -41,6 +41,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
color_attachments{};
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_depth_attachments{};
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
@ -57,6 +59,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.storeOp = vk::AttachmentStoreOp::eStore,
});
}
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
};
num_depth_attachments++;
}
// TODO: Don't restart renderpass every draw
const auto& scissor = regs.screen_scissor;
@ -69,6 +81,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.layerCount = 1,
.colorAttachmentCount = static_cast<u32>(color_attachments.size()),
.pColorAttachments = color_attachments.data(),
.pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr,
};
UpdateDynamicState(*pipeline);
@ -78,7 +91,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices;
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices;
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering();
@ -162,7 +175,7 @@ void Rasterizer::UpdateViewportScissorState() {
.y = regs.viewports[0].yoffset - regs.viewports[0].yscale,
.width = regs.viewports[0].xscale * 2.0f,
.height = regs.viewports[0].yscale * 2.0f,
.minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale,
.minDepth = /*regs.viewports[0].zoffset - regs.viewports[0].zscale*/0.f,
.maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset,
};
const vk::Rect2D scissor{

View File

@ -9,6 +9,7 @@
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/tile_manager.h"
#include <vulkan/vulkan_format_traits.hpp>
#include <vk_mem_alloc.h>
namespace VideoCore {
@ -37,10 +38,10 @@ static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eSampled;
if (false /*&& IsDepthStencilFormat(format)*/) {
if (format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD32Sfloat) {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
if (format != vk::Format::eBc3SrgbBlock) {
if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock && format != vk::Format::eBc1RgbaUnormBlock) {
usage |= vk::ImageUsageFlagBits::eColorAttachment;
}
}
@ -97,6 +98,18 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_size_bytes = buffer.GetSizeAligned();
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.GetSizeAligned();
}
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
is_tiled = image.IsTiled();
tiling_mode = image.GetTilingMode();
@ -158,11 +171,21 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
}
if (info.pixel_format == vk::Format::eR16Sscaled) {
info.is_tiled = false;
flags = {};
}
info.usage = ImageUsageFlags(info.pixel_format);
if (info.is_tiled || info.is_storage) {
if ((info.is_tiled && (info.pixel_format != vk::Format::eBc3UnormBlock) && info.pixel_format != vk::Format::eBc1RgbaSrgbBlock) || info.is_storage) {
info.usage |= vk::ImageUsageFlagBits::eStorage;
}
if (info.pixel_format == vk::Format::eD32Sfloat) {
aspect_mask = vk::ImageAspectFlagBits::eDepth;
}
if (info.pixel_format == vk::Format::eD32SfloatS8Uint) {
aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
}
const vk::ImageCreateInfo image_ci = {
.flags = flags,
@ -187,7 +210,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
ImageViewInfo view_info;
view_info.format = DemoteImageFormatForDetiling(info.pixel_format);
view_info.used_for_detiling = true;
view_for_detiler.emplace(*instance, view_info, image);
view_for_detiler.emplace(*instance, view_info, *this);
}
Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);

View File

@ -38,6 +38,8 @@ struct ImageInfo {
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
bool is_tiled = false;

View File

@ -4,6 +4,7 @@
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/image.h"
namespace VideoCore {
@ -58,7 +59,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
}
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image,
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
std::optional<vk::ImageUsageFlags> usage_override /*= {}*/)
: info{info_} {
vk::ImageViewUsageCreateInfo usage_ci{};
@ -68,14 +69,14 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = usage_override.has_value() ? &usage_ci : nullptr,
.image = image,
.image = image.image,
.viewType = info.type,
.format = info.format,
.components = info.mapping,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.aspectMask = image.aspect_mask,
.baseMipLevel = 0U,
.levelCount = 1,
.levelCount = 1u,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},

View File

@ -29,8 +29,10 @@ struct ImageViewInfo {
auto operator<=>(const ImageViewInfo&) const = default;
};
struct Image;
struct ImageView {
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image,
explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
std::optional<vk::ImageUsageFlags> usage_override = {});
~ImageView();

View File

@ -93,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
ASSERT(null_id.index == 0);
ImageViewInfo view_info;
void(slot_image_views.insert(instance, view_info, slot_images[null_id].image));
void(slot_image_views.insert(instance, view_info, slot_images[null_id]));
}
TextureCache::~TextureCache() {
@ -112,7 +112,7 @@ void TextureCache::OnCpuWrite(VAddr address) {
});
}
Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) {
std::unique_lock lock{m_page_table};
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
@ -132,7 +132,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
}
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CpuModified)) {
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
RefreshImage(image);
TrackImage(image, image_id);
}
@ -154,7 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
}
const ImageViewId view_id =
slot_image_views.insert(instance, view_info, image.image, usage_override);
slot_image_views.insert(instance, view_info, image, usage_override);
image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id];
@ -170,7 +170,19 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address());
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
ImageViewInfo view_info;
view_info.format = info.pixel_format;
return RegisterImageView(image, view_info);
}
ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
ImageViewInfo view_info;
view_info.format = info.pixel_format;
@ -331,7 +343,7 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
const u32 interval_size = interval_end_addr - interval_start_addr;
void* addr = reinterpret_cast<void*>(interval_start_addr);
if (delta > 0 && count == delta) {
mprotect(addr, interval_size, PAGE_READONLY);
//mprotect(addr, interval_size, PAGE_READONLY);
} else if (delta < 0 && count == -delta) {
mprotect(addr, interval_size, PAGE_READWRITE);
} else {

View File

@ -37,7 +37,7 @@ public:
void OnCpuWrite(VAddr address);
/// Retrieves the image handle of the image with the provided attributes and address.
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address);
[[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image);
@ -45,6 +45,8 @@ public:
/// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
[[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
/// Reuploads image contents.
void RefreshImage(Image& image);

View File

@ -304,7 +304,7 @@ bool TileManager::TryDetile(Image& image) {
return false;
}
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 64);
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
std::memcpy(data, image_data, image.info.guest_size_bytes);
staging.Commit(image.info.guest_size_bytes);