diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index fda3353e..7a441738 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -184,8 +184,8 @@ void IOFile::Open(const fs::path& path, FileAccessMode mode, FileType type, File if (!IsOpen()) { const auto ec = std::error_code{errno, std::generic_category()}; - LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}", - PathToUTF8String(file_path), ec.message()); + //LOG_ERROR(Common_Filesystem, "Failed to open the file at path={}, ec_message={}", + // PathToUTF8String(file_path), ec.message()); } } diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 0fd344b5..0d75b331 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -189,7 +189,6 @@ public: } else { ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); } - std::fflush(stdout); } private: diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index a2dfcbc1..f74514a2 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -12,7 +12,7 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { - LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); + //LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); auto* h = Common::Singleton::Instance(); auto* mnt = Common::Singleton::Instance(); @@ -170,7 +170,7 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) { } int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) { - LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); + //LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); std::string path_name = mnt->GetHostFile(path); memset(sb, 0, sizeof(OrbisKernelStat)); @@ -198,7 +198,18 @@ int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) { int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { int result = sceKernelStat(path, sb); if (result < 0) { - UNREACHABLE(); // TODO + return result; + //UNREACHABLE(); // TODO + } + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { + //LOG_INFO(Lib_Kernel, "path = {}", path); + auto* mnt = Common::Singleton::Instance(); + std::string path_name = mnt->GetHostFile(path); + if (!std::filesystem::exists(path_name)) { + return SCE_KERNEL_ERROR_ENOENT; } return ORBIS_OK; } @@ -216,6 +227,7 @@ void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("1-LFLmRFxxM", "libkernel", 1, "libkernel", 1, 1, sceKernelMkdir); LIB_FUNCTION("eV9wAD2riIA", "libkernel", 1, "libkernel", 1, 1, sceKernelStat); LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat); + LIB_FUNCTION("uWyW3v98sU4", "libkernel", 1, "libkernel", 1, 1, sceKernelCheckReachability); // openOrbis (to check if it is valid out of OpenOrbis LIB_FUNCTION("6c3rCVE-fTU", "libkernel", 1, "libkernel", 1, 1, diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index aa51c635..dfa148f5 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/assert.h" #include "common/logging/log.h" #include "common/singleton.h" @@ -392,7 +393,7 @@ int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMut int result = pthread_mutex_init(&(*mutex)->pth_mutex, &(*attr)->pth_mutex_attr); if (name != nullptr) { - LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); + //LOG_INFO(Kernel_Pthread, "name={}, result={}", name, result); } switch (result) { @@ -417,7 +418,7 @@ int PS4_SYSV_ABI scePthreadMutexDestroy(ScePthreadMutex* mutex) { int result = pthread_mutex_destroy(&(*mutex)->pth_mutex); - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); + //LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); delete *mutex; *mutex = nullptr; @@ -1036,6 +1037,207 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) { return linker->TlsGetAddr(index->ti_module, index->ti_offset); } + +int PS4_SYSV_ABI scePthreadCondDestroy(ScePthreadCond* cond) { + if (cond == nullptr) { + return SCE_KERNEL_ERROR_EINVAL; + } + int result = pthread_cond_destroy(&(*cond)->cond); + + LOG_INFO(Kernel_Pthread, "scePthreadCondDestroy, result={}", result); + + switch (result) { + case 0: + return SCE_OK; + case EBUSY: + return SCE_KERNEL_ERROR_EBUSY; + default: + return SCE_KERNEL_ERROR_EINVAL; + } +} + +int PS4_SYSV_ABI posix_pthread_condattr_init(ScePthreadCondattr* attr) { + int result = scePthreadCondattrInit(attr); + LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrInit: result = {}", result); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_cond_init(ScePthreadCond* cond, const ScePthreadCondattr* attr) { + int result = scePthreadCondInit(cond, attr, ""); + LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondInit: result = {}", result); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_condattr_destroy(ScePthreadCondattr* attr) { + int result = scePthreadCondattrDestroy(attr); + LOG_INFO(Kernel_Pthread, "redirect to scePthreadCondattrDestroy: result = {}", result); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_attr_init(ScePthreadAttr* attr) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); + int result = scePthreadAttrInit(attr); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t stacksize) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); + int result = scePthreadAttrSetstacksize(attr, stacksize); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_attr_setdetachstate(ScePthreadAttr* attr, int detachstate) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); + int result = scePthreadAttrSetdetachstate(attr, detachstate); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_mutexattr_init(ScePthreadMutexattr* attr) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); + int result = scePthreadMutexattrInit(attr); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_mutexattr_settype(ScePthreadMutexattr* attr, int type) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit"); + int result = scePthreadMutexattrSettype(attr, type); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_mutexattr_destroy(ScePthreadMutexattr* attr) { + int result = scePthreadMutexattrDestroy(attr); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) { + int result = scePthreadMutexattrSetprotocol(attr, protocol); + LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + +int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_t* size) { + + int result = pthread_attr_getstack(&(*attr)->pth_attr, addr, size); + LOG_INFO(Kernel_Pthread, "scePthreadAttrGetstack: result = {}", result); + + if (result == 0) { + return SCE_OK; + } + return SCE_KERNEL_ERROR_EINVAL; +} + +int PS4_SYSV_ABI posix_pthread_attr_destroy(ScePthreadAttr* attr) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutexattr_init redirect to scePthreadMutexattrInit"); + int result = scePthreadAttrDestroy(attr); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_create(ScePthread* thread, const ScePthreadAttr* attr, + pthreadEntryFunc start_routine, void* arg) { + LOG_INFO(Kernel_Pthread, "posix pthread_create redirect to scePthreadCreate"); + int result = scePthreadCreate(thread, attr, start_routine, arg, "PS4_Thread"); + if (result != 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_detach(ScePthread thread) { + LOG_INFO(Kernel_Pthread, "thread detach name = {}", thread->name); + thread->is_detached = true; + return ORBIS_OK; +} + +int PS4_SYSV_ABI posix_sem_init(sem_t* sem, int pshared, unsigned int value) { + return sem_init(sem, pshared, value); +} + +int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) { + return sem_wait(sem); +} + +int PS4_SYSV_ABI posix_sem_post(sem_t* sem) { + return sem_post(sem); +} + +int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) { + // LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit"); + int result = scePthreadMutexDestroy(mutex); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** value_ptr) { + return pthread_join(thread->pth, value_ptr); +} + +int PS4_SYSV_ABI posix_pthread_mutex_trylock(ScePthreadMutex* mutex) { + int result = scePthreadMutexTrylock(mutex); + if (result < 0) { + UNREACHABLE(); + } + return result; +} + void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy); LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate); @@ -1048,6 +1250,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, pthread_self); + LIB_FUNCTION("EotR8a3ASf4", "libScePosix", 1, "libkernel", 1, 1, pthread_self); LIB_FUNCTION("3qxgM4ezETA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetaffinity); LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity); LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet); @@ -1091,6 +1294,36 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); LIB_FUNCTION("2Z+PpY6CaJg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("mkx2fVhNMsg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); + + LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack); + LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach); + LIB_FUNCTION("g+PZd2hiacg", "libkernel", 1, "libkernel", 1, 1, scePthreadCondDestroy); + // posix calls + LIB_FUNCTION("wtkt-teR1so", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_init); + LIB_FUNCTION("2Q0z6rnBrTE", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_setstacksize); + LIB_FUNCTION("E+tyo3lp5Lw", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_setdetachstate); + LIB_FUNCTION("OxhIB8LB-PQ", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("zHchY8ft5pk", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_attr_destroy); + LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init); + LIB_FUNCTION("mKoTx03HRWA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_init); + LIB_FUNCTION("dJcuQVn6-Iw", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_condattr_destroy); + + LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); + LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_mutexattr_settype); + LIB_FUNCTION("5txKfcMUAok", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_mutexattr_setprotocol); + LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); + LIB_FUNCTION("K-jXhbt2gn4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_trylock); + LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); + LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init); + LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait); + LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post); + LIB_FUNCTION("HF7lK46xzjY", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_mutexattr_destroy); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index f32e2b8e..b6870a33 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -1,10 +1,15 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/native_clock.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/libs.h" +#ifdef _WIN64 +#include +#endif + namespace Libraries::Kernel { static u64 initial_ptc; @@ -30,6 +35,96 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() { return clock->GetUptime(); } +int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { + std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + return 0; +} + +int PS4_SYSV_ABI posix_usleep(u32 microseconds) { + std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + return 0; +} + +u32 PS4_SYSV_ABI sceKernelSleep(u32 seconds) { + std::this_thread::sleep_for(std::chrono::seconds(seconds)); + return 0; +} + +#define FILETIME_1970 116444736000000000ull /* seconds between 1/1/1601 and 1/1/1970 */ +#define HECTONANOSEC_PER_SEC 10000000ull + +struct SceKernelTimeval { + time_t tv_sec; + s64 tv_usec; +}; + +struct timezone { + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +struct timeval { + long tv_sec; + long tv_usec; +}; + +int PS4_SYSV_ABI getntptimeofday(struct timespec* tp, struct timezone* z) { + int res = 0; + union { + unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */ + FILETIME ft; + } _now; + TIME_ZONE_INFORMATION TimeZoneInformation; + DWORD tzi; + + if (z != NULL) { + if ((tzi = GetTimeZoneInformation(&TimeZoneInformation)) != TIME_ZONE_ID_INVALID) { + z->tz_minuteswest = TimeZoneInformation.Bias; + if (tzi == TIME_ZONE_ID_DAYLIGHT) + z->tz_dsttime = 1; + else + z->tz_dsttime = 0; + } else { + z->tz_minuteswest = 0; + z->tz_dsttime = 0; + } + } + + if (tp != NULL) { + typedef void(WINAPI * GetSystemTimeAsFileTime_t)(LPFILETIME); + static GetSystemTimeAsFileTime_t GetSystemTimeAsFileTime_p /* = 0 */; + + /* Set function pointer during first call */ + GetSystemTimeAsFileTime_t get_time = + __atomic_load_n(&GetSystemTimeAsFileTime_p, __ATOMIC_RELAXED); + if (get_time == NULL) { + /* Use GetSystemTimePreciseAsFileTime() if available (Windows 8 or later) */ + get_time = (GetSystemTimeAsFileTime_t)(intptr_t)GetProcAddress( + GetModuleHandle("kernel32.dll"), + "GetSystemTimePreciseAsFileTime"); /* <1us precision on Windows 10 */ + if (get_time == NULL) + get_time = GetSystemTimeAsFileTime; /* >15ms precision on Windows 10 */ + __atomic_store_n(&GetSystemTimeAsFileTime_p, get_time, __ATOMIC_RELAXED); + } + + get_time(&_now.ft); /* 100 nano-seconds since 1-1-1601 */ + _now.ns100 -= FILETIME_1970; /* 100 nano-seconds since 1-1-1970 */ + tp->tv_sec = _now.ns100 / HECTONANOSEC_PER_SEC; /* seconds since 1-1-1970 */ + tp->tv_nsec = (long)(_now.ns100 % HECTONANOSEC_PER_SEC) * 100; /* nanoseconds */ + } + return res; +} + +int PS4_SYSV_ABI gettimeofday(struct timeval* p, struct timezone* z) { + struct timespec tp; + + if (getntptimeofday(&tp, z)) + return -1; + p->tv_sec = tp.tv_sec; + p->tv_usec = (tp.tv_nsec / 1000); + return 0; +} + void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) { clock = std::make_unique(); initial_ptc = clock->GetUptime(); @@ -39,6 +134,12 @@ void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym) { sceKernelGetProcessTimeCounterFrequency); LIB_FUNCTION("-2IRUCO--PM", "libkernel", 1, "libkernel", 1, 1, sceKernelReadTsc); LIB_FUNCTION("1j3S3n-tTW4", "libkernel", 1, "libkernel", 1, 1, sceKernelGetTscFrequency); + LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, gettimeofday); + LIB_FUNCTION("n88vx3C5nW8", "libkernel", 1, "libkernel", 1, 1, gettimeofday); + LIB_FUNCTION("1jfXLRVzisc", "libkernel", 1, "libkernel", 1, 1, sceKernelUsleep); + LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep); + LIB_FUNCTION("-ZR+hG7aDHw", "libkernel", 1, "libkernel", 1, 1, sceKernelSleep); + LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, sceKernelSleep); } } // namespace Libraries::Kernel diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8ecd311b..acae3b52 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -35,7 +35,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, } // Align free position - free_addr = Common::AlignUp(free_addr, alignment); + free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; ASSERT(free_addr >= search_start && free_addr + size <= search_end); // Add the allocated region to the list and commit its pages. diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index ac8f22af..bb3ad2d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -16,9 +16,10 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset) { + // TODO + return EmitImageSampleImplicitLod(ctx, inst, handle, coords, bias_lc, offset); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 728dd2bc..dd3674bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -334,8 +334,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, + Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 49fe2052..8d9e5da8 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -823,6 +823,7 @@ IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool(ir.GetVccHi()); + } else { + value = ir.GetVccHi(); + } break; default: UNREACHABLE(); @@ -297,6 +301,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MADAK_F32: // Yes these can share the opcode translator.V_FMA_F32(inst); break; + case Opcode::IMAGE_SAMPLE_LZ: // TODO? case Opcode::IMAGE_SAMPLE: translator.IMAGE_SAMPLE(inst); break; @@ -351,9 +356,15 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_CMP_LG_U32: translator.S_CMP(ConditionOp::LG, false, inst); break; + case Opcode::S_CMP_LT_I32: + translator.S_CMP(ConditionOp::LT, true, inst); + break; case Opcode::S_CMP_LG_I32: translator.S_CMP(ConditionOp::LG, true, inst); break; + case Opcode::S_CMP_GT_I32: + translator.S_CMP(ConditionOp::GT, true, inst); + break; case Opcode::S_CMP_EQ_I32: translator.S_CMP(ConditionOp::EQ, true, inst); break; @@ -387,6 +398,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_SIN_F32: translator.V_SIN_F32(inst); break; + case Opcode::V_COS_F32: + translator.V_COS_F32(inst); + break; case Opcode::V_LOG_F32: translator.V_LOG_F32(inst); break; @@ -522,6 +536,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_RNDNE_F32: translator.V_RNDNE_F32(inst); break; + case Opcode::V_BCNT_U32_B32: + translator.V_BCNT_U32_B32(inst); + break; case Opcode::S_NOP: case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_SCC0: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6fd8e3f5..870cb3aa 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -104,6 +104,8 @@ public: void V_ASHRREV_I32(const GcnInst& inst); void V_MAD_U32_U24(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst); + void V_BCNT_U32_B32(const GcnInst& inst); + void V_COS_F32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7484da57..7222c5fc 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -372,4 +372,15 @@ void Translator::V_RNDNE_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPRoundEven(src0)); } +void Translator::V_BCNT_U32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + SetDst(inst.dst[0], ir.IAdd(ir.BitCount(src0), src1)); +} + +void Translator::V_COS_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + SetDst(inst.dst[0], ir.FPCos(src0)); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 9694b06c..909217d7 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -31,7 +31,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; - ASSERT(!mimg.da); + //ASSERT(!mimg.da); IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg dest_reg{inst.dst[0].code}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index cf57939d..0d15008a 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -129,6 +129,7 @@ void IREmitter::SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value) { template <> U32 IREmitter::GetScalarReg(IR::ScalarReg reg) { + ASSERT(reg < IR::ScalarReg::Max); return Inst(Opcode::GetScalarRegister, reg); } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 68b4fb11..36e816fb 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -148,6 +148,9 @@ private: } // Anonymous namespace SharpLocation TrackSharp(const IR::Inst* inst) { + while (inst->GetOpcode() == IR::Opcode::Phi) { + inst = inst->Arg(0).InstRecursive(); + } if (inst->GetOpcode() == IR::Opcode::GetUserData) { return SharpLocation{ .sgpr_base = u32(IR::ScalarReg::Max), @@ -163,6 +166,12 @@ SharpLocation TrackSharp(const IR::Inst* inst) { // Retrieve SGPR pair that holds sbase const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive(); const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive(); + while (sbase0->GetOpcode() == IR::Opcode::Phi) { + sbase0 = sbase0->Arg(0).TryInstRecursive(); + } + while (sbase1->GetOpcode() == IR::Opcode::Phi) { + sbase1 = sbase1->Arg(0).TryInstRecursive(); + } ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData && sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported"); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index f2834abf..47198ea6 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -53,8 +53,15 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool color_src_factor; @@ -612,7 +621,7 @@ struct Liverpool { BitField<0, 2, EndianSwap> endian; BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberFormat> number_type; + BitField<8, 3, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; @@ -680,7 +689,7 @@ struct Liverpool { NumberFormat NumFormat() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb : info.number_type; } }; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index eb319f09..d0dc6872 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -176,6 +176,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { return vk::BlendOp::eMin; case BlendFunc::Max: return vk::BlendOp::eMax; + case BlendFunc::ReverseSubtract: + return vk::BlendOp::eReverseSubtract; default: UNREACHABLE(); } @@ -316,7 +318,19 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eBc7SrgbBlock; } - UNREACHABLE(); + if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc1RgbaUnormBlock; + } + if (data_format == AmdGpu::DataFormat::FormatBc3 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc3UnormBlock; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR8G8B8A8Uint; + } + if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR16Sfloat; + } + UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8c78a857..b6764db7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -63,8 +63,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = attributes.data(), }; - ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(), - "Rectangle List primitive type is only supported for embedded VS"); + if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { + LOG_WARNING(Render_Vulkan, "Rectangle List primitive type is only supported for embedded VS"); + } const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { .topology = LiverpoolToVK::PrimitiveType(key.prim_type), diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index dc10ec6c..d811c42c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -42,8 +42,8 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers) Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index) : instance{CreateInstance(dl, window.getWindowInfo().type, true, false)}, - debug_callback{CreateDebugCallback(*instance)}, physical_devices{instance->enumeratePhysicalDevices()} { + debug_callback = CreateDebugCallback(*instance); const std::size_t num_physical_devices = static_cast(physical_devices.size()); ASSERT_MSG(num_physical_devices > 0, "No physical devices found"); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3a14a02e..95762e66 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -112,6 +112,7 @@ void PipelineCache::RefreshGraphicsKey() { key.color_formats[remapped_cb] = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; ++remapped_cb; @@ -160,6 +161,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. + LOG_INFO(Render_Vulkan, "Compiling shader {:#x}", hash); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 5cc890f6..831a501f 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -140,10 +140,7 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window if (window_type != Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - - if (enable_debug_utils) { - extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); // Sanitize extension list std::erase_if(extensions, [&](const char* extension) -> bool { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d49e7138..12cdc0cb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -41,6 +41,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { boost::container::static_vector color_attachments{}; + vk::RenderingAttachmentInfo depth_attachment{}; + u32 num_depth_attachments{}; for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { @@ -57,6 +59,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .storeOp = vk::AttachmentStoreOp::eStore, }); } + if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { + const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); + depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + }; + num_depth_attachments++; + } // TODO: Don't restart renderpass every draw const auto& scissor = regs.screen_scissor; @@ -69,6 +81,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .layerCount = 1, .colorAttachmentCount = static_cast(color_attachments.size()), .pColorAttachments = color_attachments.data(), + .pDepthAttachment = num_depth_attachments ? &depth_attachment : nullptr, }; UpdateDynamicState(*pipeline); @@ -78,7 +91,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { - const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices; + const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList ? 4 : regs.num_indices; cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0); } cmdbuf.endRendering(); @@ -162,7 +175,7 @@ void Rasterizer::UpdateViewportScissorState() { .y = regs.viewports[0].yoffset - regs.viewports[0].yscale, .width = regs.viewports[0].xscale * 2.0f, .height = regs.viewports[0].yscale * 2.0f, - .minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale, + .minDepth = /*regs.viewports[0].zoffset - regs.viewports[0].zscale*/0.f, .maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset, }; const vk::Rect2D scissor{ diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 7aa3062b..4391076b 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -9,6 +9,7 @@ #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/tile_manager.h" +#include #include namespace VideoCore { @@ -37,10 +38,10 @@ static vk::ImageUsageFlags ImageUsageFlags(const vk::Format format) { vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; - if (false /*&& IsDepthStencilFormat(format)*/) { + if (format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD32Sfloat) { usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { - if (format != vk::Format::eBc3SrgbBlock) { + if (format != vk::Format::eBc3SrgbBlock && format != vk::Format::eBc3UnormBlock && format != vk::Format::eBc1RgbaUnormBlock) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } } @@ -97,6 +98,18 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, guest_size_bytes = buffer.GetSizeAligned(); } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { + is_tiled = false; + pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); + type = vk::ImageType::e2D; + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); + size.depth = 1; + pitch = size.width; + guest_size_bytes = buffer.GetSizeAligned(); +} + ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { is_tiled = image.IsTiled(); tiling_mode = image.GetTilingMode(); @@ -158,11 +171,21 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; } } + if (info.pixel_format == vk::Format::eR16Sscaled) { + info.is_tiled = false; + flags = {}; + } info.usage = ImageUsageFlags(info.pixel_format); - if (info.is_tiled || info.is_storage) { + if ((info.is_tiled && (info.pixel_format != vk::Format::eBc3UnormBlock) && info.pixel_format != vk::Format::eBc1RgbaSrgbBlock) || info.is_storage) { info.usage |= vk::ImageUsageFlagBits::eStorage; } + if (info.pixel_format == vk::Format::eD32Sfloat) { + aspect_mask = vk::ImageAspectFlagBits::eDepth; + } + if (info.pixel_format == vk::Format::eD32SfloatS8Uint) { + aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + } const vk::ImageCreateInfo image_ci = { .flags = flags, @@ -187,7 +210,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, ImageViewInfo view_info; view_info.format = DemoteImageFormatForDetiling(info.pixel_format); view_info.used_for_detiling = true; - view_for_detiler.emplace(*instance, view_info, image); + view_for_detiler.emplace(*instance, view_info, *this); } Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c357f8a2..64bcfbd3 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -38,6 +38,8 @@ struct ImageInfo { explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 919415e8..532e4291 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -4,6 +4,7 @@ #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/image_view.h" +#include "video_core/texture_cache/image.h" namespace VideoCore { @@ -58,7 +59,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } -ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image, +ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, std::optional usage_override /*= {}*/) : info{info_} { vk::ImageViewUsageCreateInfo usage_ci{}; @@ -68,14 +69,14 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info const vk::ImageViewCreateInfo image_view_ci = { .pNext = usage_override.has_value() ? &usage_ci : nullptr, - .image = image, + .image = image.image, .viewType = info.type, .format = info.format, .components = info.mapping, .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, + .aspectMask = image.aspect_mask, .baseMipLevel = 0U, - .levelCount = 1, + .levelCount = 1u, .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index aa4ec8ee..e43f55d2 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -29,8 +29,10 @@ struct ImageViewInfo { auto operator<=>(const ImageViewInfo&) const = default; }; +struct Image; + struct ImageView { - explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image, + explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, std::optional usage_override = {}); ~ImageView(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 39f89878..43f13932 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -93,7 +93,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ASSERT(null_id.index == 0); ImageViewInfo view_info; - void(slot_image_views.insert(instance, view_info, slot_images[null_id].image)); + void(slot_image_views.insert(instance, view_info, slot_images[null_id])); } TextureCache::~TextureCache() { @@ -112,7 +112,7 @@ void TextureCache::OnCpuWrite(VAddr address) { }); } -Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { +Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create) { std::unique_lock lock{m_page_table}; boost::container::small_vector image_ids; ForEachImageInRegion(cpu_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { @@ -132,7 +132,7 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { } Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified)) { + if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { RefreshImage(image); TrackImage(image, image_id); } @@ -154,7 +154,7 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi } const ImageViewId view_id = - slot_image_views.insert(instance, view_info, image.image, usage_override); + slot_image_views.insert(instance, view_info, image, usage_override); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; @@ -170,7 +170,19 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - auto& image = FindImage(info, buffer.Address()); + auto& image = FindImage(info, buffer.Address(), false); + image.flags &= ~ImageFlagBits::CpuModified; + + ImageViewInfo view_info; + view_info.format = info.pixel_format; + return RegisterImageView(image, view_info); +} + +ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) { + const ImageInfo info{buffer, hint}; + auto& image = FindImage(info, buffer.Address(), false); + image.flags &= ~ImageFlagBits::CpuModified; ImageViewInfo view_info; view_info.format = info.pixel_format; @@ -331,7 +343,7 @@ void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { const u32 interval_size = interval_end_addr - interval_start_addr; void* addr = reinterpret_cast(interval_start_addr); if (delta > 0 && count == delta) { - mprotect(addr, interval_size, PAGE_READONLY); + //mprotect(addr, interval_size, PAGE_READONLY); } else if (delta < 0 && count == -delta) { mprotect(addr, interval_size, PAGE_READWRITE); } else { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4dbff73..0a4ed2b7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,7 @@ public: void OnCpuWrite(VAddr address); /// Retrieves the image handle of the image with the provided attributes and address. - [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address); + [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address, bool refresh_on_create = true); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image); @@ -45,6 +45,8 @@ public: /// Retrieves the render target with specified properties [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint); + [[nodiscard]] ImageView& DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. void RefreshImage(Image& image); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 54cbc5da..2ee75452 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -304,7 +304,7 @@ bool TileManager::TryDetile(Image& image) { return false; } - const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); + const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 64); const u8* image_data = reinterpret_cast(image.cpu_addr); std::memcpy(data, image_data, image.info.guest_size_bytes); staging.Commit(image.info.guest_size_bytes);