Various fixes

This commit is contained in:
raphaelthegreat 2024-06-03 18:52:50 +03:00
parent 511595aca7
commit ea2e4f7b5c
23 changed files with 231 additions and 109 deletions

View File

@ -66,7 +66,7 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq,
port.sample_size = 4; port.sample_size = 4;
break; break;
default: default:
UNREACHABLE_MSG("Unknown format"); UNREACHABLE_MSG("Unknown format {}", u32(format));
} }
for (int i = 0; i < port.channels_num; i++) { for (int i = 0; i < port.channels_num; i++) {

View File

@ -234,7 +234,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
"AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} " "AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} "
"param_type = {}", "param_type = {}",
user_id, GetAudioOutPort(port_type), index, length, sample_rate, user_id, GetAudioOutPort(port_type), index, length, sample_rate,
GetAudioOutParam(param_type)); GetAudioOutParam(param_type & 0xFF));
if ((port_type < 0 || port_type > 4) && (port_type != 127)) { if ((port_type < 0 || port_type > 4) && (port_type != 127)) {
LOG_ERROR(Lib_AudioOut, "Invalid port type"); LOG_ERROR(Lib_AudioOut, "Invalid port type");
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT_TYPE; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT_TYPE;
@ -243,10 +243,6 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
LOG_ERROR(Lib_AudioOut, "Invalid sample rate"); LOG_ERROR(Lib_AudioOut, "Invalid sample rate");
return ORBIS_AUDIO_OUT_ERROR_INVALID_SAMPLE_FREQ; return ORBIS_AUDIO_OUT_ERROR_INVALID_SAMPLE_FREQ;
} }
if (param_type < 0 || param_type > 7) {
LOG_ERROR(Lib_AudioOut, "Invalid format");
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
}
if (length != 256 && length != 512 && length != 768 && length != 1024 && length != 1280 && if (length != 256 && length != 512 && length != 768 && length != 1024 && length != 1280 &&
length != 1536 && length != 1792 && length != 2048) { length != 1536 && length != 1792 && length != 2048) {
LOG_ERROR(Lib_AudioOut, "Invalid length"); LOG_ERROR(Lib_AudioOut, "Invalid length");
@ -255,7 +251,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
if (index != 0) { if (index != 0) {
LOG_ERROR(Lib_AudioOut, "index is not valid !=0 {}", index); LOG_ERROR(Lib_AudioOut, "index is not valid !=0 {}", index);
} }
int result = audio->AudioOutOpen(port_type, length, sample_rate, param_type); int result = audio->AudioOutOpen(port_type, length, sample_rate, OrbisAudioOutParam(param_type & 0xFF));
if (result == -1) { if (result == -1) {
LOG_ERROR(Lib_AudioOut, "Audio ports are full"); LOG_ERROR(Lib_AudioOut, "Audio ports are full");
return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; return ORBIS_AUDIO_OUT_ERROR_PORT_FULL;

View File

@ -28,7 +28,7 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!"); LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return SCE_KERNEL_ERROR_EINVAL; return SCE_KERNEL_ERROR_EINVAL;
} }
if ((alignment != 0 || Common::Is16KBAligned(alignment)) && !std::has_single_bit(alignment)) { if (alignment != 0 && !Common::Is16KBAligned(alignment)) {
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!"); LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
return SCE_KERNEL_ERROR_EINVAL; return SCE_KERNEL_ERROR_EINVAL;
} }

View File

@ -10,7 +10,6 @@
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/libraries/kernel/thread_management.h" #include "core/libraries/kernel/thread_management.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/tls.h"
#include "core/linker.h" #include "core/linker.h"
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
@ -516,7 +515,7 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) {
int result = pthread_mutex_lock(&(*mutex)->pth_mutex); int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
if (result != 0) { if (result != 0) {
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); //LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
} }
switch (result) { switch (result) {
case 0: case 0:
@ -539,7 +538,7 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) {
int result = pthread_mutex_unlock(&(*mutex)->pth_mutex); int result = pthread_mutex_unlock(&(*mutex)->pth_mutex);
if (result != 0) { if (result != 0) {
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result); //LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
} }
switch (result) { switch (result) {
case 0: case 0:
@ -1122,6 +1121,34 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
return linker->TlsGetAddr(index->ti_module, index->ti_offset); return linker->TlsGetAddr(index->ti_module, index->ti_offset);
} }
int PS4_SYSV_ABI posix_sem_init(sem_t *sem, int pshared, unsigned int value) {
return sem_init(sem, pshared, value);
}
int PS4_SYSV_ABI posix_sem_wait(sem_t *sem) {
return sem_wait(sem);
}
int PS4_SYSV_ABI posix_sem_post(sem_t *sem) {
return sem_post(sem);
}
int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) {
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
int result = scePthreadMutexDestroy(mutex);
if (result < 0) {
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
? result + -SCE_KERNEL_ERROR_UNKNOWN
: POSIX_EOTHER;
return rt;
}
return result;
}
int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** value_ptr) {
return pthread_join(thread->pth, value_ptr);
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy); LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy);
LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate); LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate);
@ -1173,9 +1200,14 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_init); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_init);
LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_settype); LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_settype);
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init); LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init);
LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait);
LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post);
LIB_FUNCTION("QBi7HCK03hw", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGettime); LIB_FUNCTION("QBi7HCK03hw", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGettime);
LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, clock_gettime); LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, clock_gettime);

View File

@ -8,6 +8,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <pthread.h> #include <pthread.h>
#include <semaphore.h>
#include <sched.h> #include <sched.h>
#include "common/types.h" #include "common/types.h"

View File

@ -57,24 +57,24 @@ void Linker::Execute() {
} }
// Calculate static TLS size. // Calculate static TLS size.
static constexpr size_t StOff = 0x80; // TODO: What is this offset? for (const auto& module : m_modules) {
static_tls_size = std::ranges::fold_left(m_modules, StOff, [&](u32 size, auto& module) { if (module->tls.image_size != 0) {
const size_t new_size = size + module->tls.image_size; module->tls.modid = ++max_tls_index;
module->tls.distance_from_fs = new_size; }
return new_size; static_tls_size += module->tls.image_size;
}); module->tls.offset = static_tls_size;
}
Common::SetCurrentThreadName("GAME_MainThread");
Libraries::Kernel::pthreadInitSelfMainThread();
// Init primary thread TLS.
InitTlsForThread(true);
// Relocate all modules // Relocate all modules
for (u32 i = 1; const auto& m : m_modules) { for (const auto& m : m_modules) {
Relocate(i, m.get()); Relocate(m.get());
} }
// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);
// Start shared library modules // Start shared library modules
for (auto& m : m_modules) { for (auto& m : m_modules) {
if (m->IsSharedLib()) { if (m->IsSharedLib()) {
@ -113,7 +113,7 @@ Module* Linker::LoadModule(const std::filesystem::path& elf_name) {
return m_modules.emplace_back(std::move(module)).get(); return m_modules.emplace_back(std::move(module)).get();
} }
void Linker::Relocate(u32 index, Module* module) { void Linker::Relocate(Module* module) {
module->ForEachRelocation([&](elf_relocation* rel, bool isJmpRel) { module->ForEachRelocation([&](elf_relocation* rel, bool isJmpRel) {
auto type = rel->GetType(); auto type = rel->GetType();
auto symbol = rel->GetSymbol(); auto symbol = rel->GetSymbol();
@ -134,7 +134,7 @@ void Linker::Relocate(u32 index, Module* module) {
rel_is_resolved = true; rel_is_resolved = true;
break; break;
case R_X86_64_DTPMOD64: case R_X86_64_DTPMOD64:
rel_value = static_cast<uint64_t>(index); rel_value = static_cast<u64>(module->tls.modid);
rel_is_resolved = true; rel_is_resolved = true;
rel_sym_type = Loader::SymbolType::Tls; rel_sym_type = Loader::SymbolType::Tls;
break; break;
@ -254,10 +254,11 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
} }
void* Linker::TlsGetAddr(u64 module_index, u64 offset) { void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
DtvEntry* dtv_table = GetTcbBase()->tcb_dtv;
ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter, ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter,
"Reallocation of DTV table is not supported"); "Reallocation of DTV table is not supported");
void* module = dtv_table[module_index + 1].pointer; void* module = (u8*)dtv_table[module_index + 1].pointer + offset;
ASSERT_MSG(module, "DTV allocation is not supported"); ASSERT_MSG(module, "DTV allocation is not supported");
return module; return module;
} }
@ -286,26 +287,29 @@ void Linker::InitTlsForThread(bool is_primary) {
} }
// Initialize allocated memory and allocate DTV table. // Initialize allocated memory and allocate DTV table.
const u32 num_dtvs = m_modules.size() - 1; const u32 num_dtvs = max_tls_index;
std::memset(addr_out, 0, total_tls_size); std::memset(addr_out, 0, total_tls_size);
dtv_table.resize(num_dtvs + 2); DtvEntry* dtv_table = new DtvEntry[num_dtvs + 2];
// Initialize thread control block // Initialize thread control block
u8* addr = reinterpret_cast<u8*>(addr_out); u8* addr = reinterpret_cast<u8*>(addr_out);
Tcb* tcb = reinterpret_cast<Tcb*>(addr + static_tls_size); Tcb* tcb = reinterpret_cast<Tcb*>(addr + static_tls_size);
tcb->tcb_self = tcb; tcb->tcb_self = tcb;
tcb->tcb_dtv = dtv_table.data(); tcb->tcb_dtv = dtv_table;
// Dtv[0] is the generation counter. libkernel puts their number into dtv[1] (why?) // Dtv[0] is the generation counter. libkernel puts their number into dtv[1] (why?)
dtv_table[0].counter = dtv_generation_counter; dtv_table[0].counter = dtv_generation_counter;
dtv_table[1].counter = num_dtvs; dtv_table[1].counter = num_dtvs;
// Copy init images to TLS thread blocks and map them to DTV slots. // Copy init images to TLS thread blocks and map them to DTV slots.
for (u32 i = 2; const auto& module : m_modules) { for (const auto& module : m_modules) {
u8* dest = reinterpret_cast<u8*>(addr + static_tls_size - module->tls.distance_from_fs); if (module->tls.image_size == 0) {
continue;
}
u8* dest = reinterpret_cast<u8*>(addr + static_tls_size - module->tls.offset);
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr); const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
std::memcpy(dest, src, module->tls.init_image_size); std::memcpy(dest, src, module->tls.init_image_size);
tcb->tcb_dtv[i++].pointer = dest; tcb->tcb_dtv[module->tls.modid + 1].pointer = dest;
} }
// Set pointer to FS base // Set pointer to FS base

View File

@ -17,19 +17,6 @@ struct EntryParams {
const char* argv[3]; const char* argv[3];
}; };
union DtvEntry {
struct {
size_t counter;
};
void* pointer;
};
struct Tcb {
Tcb* tcb_self;
DtvEntry* tcb_dtv;
void* tcb_thread;
};
using HeapApiFunc = PS4_SYSV_ABI void*(*)(size_t); using HeapApiFunc = PS4_SYSV_ABI void*(*)(size_t);
class Linker { class Linker {
@ -54,7 +41,7 @@ public:
Module* LoadModule(const std::filesystem::path& elf_name); Module* LoadModule(const std::filesystem::path& elf_name);
void Relocate(u32 index, Module* module); void Relocate(Module* module);
void Resolve(const std::string& name, Loader::SymbolType type, void Resolve(const std::string& name, Loader::SymbolType type,
Module* module, Loader::SymbolRecord* return_info); Module* module, Loader::SymbolRecord* return_info);
void Execute(); void Execute();
@ -64,9 +51,9 @@ private:
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l); const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
void InitTls(); void InitTls();
std::vector<DtvEntry> dtv_table;
u32 dtv_generation_counter{1}; u32 dtv_generation_counter{1};
size_t static_tls_size{}; size_t static_tls_size{};
size_t max_tls_index{};
HeapApiFunc heap_api_func{}; HeapApiFunc heap_api_func{};
std::vector<std::unique_ptr<Module>> m_modules; std::vector<std::unique_ptr<Module>> m_modules;
Loader::SymbolsResolver m_hle_symbols{}; Loader::SymbolsResolver m_hle_symbols{};

View File

@ -35,7 +35,9 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
} }
// Align free position // Align free position
free_addr = Common::AlignUp(free_addr, alignment); if (alignment > 0) {
free_addr = Common::AlignUp(free_addr, alignment);
}
ASSERT(free_addr >= search_start && free_addr + size <= search_end); ASSERT(free_addr >= search_start && free_addr + size <= search_end);
// Add the allocated region to the list and commit its pages. // Add the allocated region to the list and commit its pages.

View File

@ -47,10 +47,11 @@ struct LibraryInfo {
struct ThreadLocalImage { struct ThreadLocalImage {
u64 align; u64 align;
u64 image_size;
u64 offset;
u32 modid;
VAddr image_virtual_addr; VAddr image_virtual_addr;
u64 init_image_size; u64 init_image_size;
u64 image_size;
u64 distance_from_fs;
}; };
struct DynamicModuleInfo { struct DynamicModuleInfo {
@ -166,7 +167,7 @@ public:
std::vector<u8> m_dynamic_data; std::vector<u8> m_dynamic_data;
Loader::SymbolsResolver export_sym; Loader::SymbolsResolver export_sym;
Loader::SymbolsResolver import_sym; Loader::SymbolsResolver import_sym;
ThreadLocalImage tls; ThreadLocalImage tls{};
}; };
} // namespace Core } // namespace Core

View File

@ -49,6 +49,10 @@ void SetTcbBase(void* image_address) {
ASSERT(result != 0); ASSERT(result != 0);
} }
Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(TlsGetValue(slot));
}
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
using namespace Xbyak::util; using namespace Xbyak::util;

View File

@ -11,9 +11,23 @@ class CodeGenerator;
namespace Core { namespace Core {
union DtvEntry {
size_t counter;
void* pointer;
};
struct Tcb {
Tcb* tcb_self;
DtvEntry* tcb_dtv;
void* tcb_thread;
};
/// Sets the data pointer to the TCB block. /// Sets the data pointer to the TCB block.
void SetTcbBase(void* image_address); void SetTcbBase(void* image_address);
/// Retrieves Tcb structure for the calling thread.
Tcb* GetTcbBase();
/// Patches any instructions that access guest TLS to use provided storage. /// Patches any instructions that access guest TLS to use provided storage.
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c); void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);

View File

@ -87,6 +87,7 @@ int main(int argc, char* argv[]) {
linker->LoadModule(entry.path().string().c_str()); linker->LoadModule(entry.path().string().c_str());
} }
} }
// Check if there is a libc.prx in sce_module folder // Check if there is a libc.prx in sce_module folder
bool found = false; bool found = false;
if (Config::isLleLibc()) { if (Config::isLleLibc()) {
@ -94,7 +95,8 @@ int main(int argc, char* argv[]) {
if (std::filesystem::is_directory(sce_module_folder)) { if (std::filesystem::is_directory(sce_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) { for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) {
if (entry.path().filename() == "libc.prx" || if (entry.path().filename() == "libc.prx" ||
entry.path().filename() == "libSceFios2.prx") { entry.path().filename() == "libSceFios2.prx" ||
entry.path().filename() == "libSceNpToolkit2.prx") {
found = true; found = true;
LOG_INFO(Loader, "Loading {}", entry.path().string().c_str()); LOG_INFO(Loader, "Loading {}", entry.path().string().c_str());
linker->LoadModule(entry.path().string().c_str()); linker->LoadModule(entry.path().string().c_str());
@ -105,6 +107,7 @@ int main(int argc, char* argv[]) {
if (!found) { if (!found) {
Libraries::LibC::libcSymbolsRegister(&linker->GetHLESymbols()); Libraries::LibC::libcSymbolsRegister(&linker->GetHLESymbols());
} }
std::thread mainthread([linker]() { linker->Execute(); }); std::thread mainthread([linker]() { linker->Execute(); });
Discord::RPC discordRPC; Discord::RPC discordRPC;
discordRPC.init(); discordRPC.init();

View File

@ -55,8 +55,20 @@ void Translator::S_ANDN2_B64(const GcnInst& inst) {
const IR::U1 src0{get_src(inst.src[0])}; const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])}; const IR::U1 src1{get_src(inst.src[1])};
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))}; const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
SetDst(inst.dst[0], result);
ir.SetScc(result); ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ExecLo:
ir.SetExec(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
} }
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
@ -124,9 +136,17 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
if (negate) { if (negate) {
result = ir.LogicalNot(result); result = ir.LogicalNot(result);
} }
ASSERT(inst.dst[0].field == OperandField::VccLo);
ir.SetVcc(result);
ir.SetScc(result); ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
} }
void Translator::S_AND_B64(const GcnInst& inst) { void Translator::S_AND_B64(const GcnInst& inst) {
@ -145,9 +165,17 @@ void Translator::S_AND_B64(const GcnInst& inst) {
const IR::U1 src0{get_src(inst.src[0])}; const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])}; const IR::U1 src1{get_src(inst.src[1])};
const IR::U1 result = ir.LogicalAnd(src0, src1); const IR::U1 result = ir.LogicalAnd(src0, src1);
ASSERT(inst.dst[0].field == OperandField::VccLo);
ir.SetVcc(result);
ir.SetScc(result); ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
} }
void Translator::S_ADD_I32(const GcnInst& inst) { void Translator::S_ADD_I32(const GcnInst& inst) {
@ -179,6 +207,36 @@ void Translator::S_CSELECT_B32(const GcnInst& inst) {
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)}); SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
} }
void Translator::S_CSELECT_B64(const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
case OperandField::ConstZero:
return ir.Imm1(false);
default:
UNREACHABLE();
}
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 src1{get_src(inst.src[1])};
const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)};
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
}
void Translator::S_BFE_U32(const GcnInst& inst) { void Translator::S_BFE_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};

View File

@ -5,30 +5,15 @@
namespace Shader::Gcn { namespace Shader::Gcn {
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
const IR::U32U64& address) {
for (u32 i = 0; i < num_dwords; i++) {
if (handle.IsEmpty()) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
} else {
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
}
}
}
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd; const auto& smrd = inst.control.smrd;
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 offset = const IR::Value base = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
smrd.imm ? ir.Imm32(smrd.offset * 4) IR::ScalarReg dst_reg{inst.dst[0].code};
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), for (u32 i = 0; i < num_dwords; i++) {
ir.Imm32(2))}; ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
const IR::U64 base = }
ir.PackUint2x32(ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)));
const IR::U64 address = ir.IAdd(base, offset);
const IR::ScalarReg dst_reg{inst.dst[0].code};
Load(ir, num_dwords, {}, dst_reg, address);
} }
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
@ -37,8 +22,11 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const IR::U32 dword_offset = const IR::U32 dword_offset =
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset)); smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
const IR::Value vsharp = ir.GetScalarReg(sbase); const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::ScalarReg dst_reg{inst.dst[0].code}; IR::ScalarReg dst_reg{inst.dst[0].code};
Load(ir, num_dwords, vsharp, dst_reg, dword_offset); for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index));
}
} }
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -256,6 +256,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
break; break;
case Opcode::S_WAITCNT: case Opcode::S_WAITCNT:
break; break;
case Opcode::S_LOAD_DWORDX4:
translator.S_LOAD_DWORD(4, inst);
break;
case Opcode::S_LOAD_DWORDX8:
translator.S_LOAD_DWORD(8, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORD: case Opcode::S_BUFFER_LOAD_DWORD:
translator.S_BUFFER_LOAD_DWORD(1, inst); translator.S_BUFFER_LOAD_DWORD(1, inst);
break; break;
@ -356,9 +362,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CMP_LG_U32: case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst); translator.S_CMP(ConditionOp::LG, false, inst);
break; break;
case Opcode::S_CMP_LG_I32:
translator.S_CMP(ConditionOp::LG, true, inst);
break;
case Opcode::S_CMP_EQ_I32: case Opcode::S_CMP_EQ_I32:
translator.S_CMP(ConditionOp::EQ, true, inst); translator.S_CMP(ConditionOp::EQ, true, inst);
break; break;
case Opcode::S_CMP_EQ_U32:
translator.S_CMP(ConditionOp::EQ, false, inst);
break;
case Opcode::V_CNDMASK_B32: case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst); translator.V_CNDMASK_B32(inst);
break; break;
@ -509,6 +521,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CSELECT_B32: case Opcode::S_CSELECT_B32:
translator.S_CSELECT_B32(inst); translator.S_CSELECT_B32(inst);
break; break;
case Opcode::S_CSELECT_B64:
translator.S_CSELECT_B64(inst);
break;
case Opcode::S_BFE_U32: case Opcode::S_BFE_U32:
translator.S_BFE_U32(inst); translator.S_BFE_U32(inst);
break; break;
@ -516,6 +531,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1: case Opcode::S_CBRANCH_SCC1:
case Opcode::S_CBRANCH_VCCNZ:
case Opcode::S_CBRANCH_VCCZ:
case Opcode::S_BRANCH: case Opcode::S_BRANCH:
case Opcode::S_WQM_B64: case Opcode::S_WQM_B64:
case Opcode::V_INTERP_P1_F32: case Opcode::V_INTERP_P1_F32:
@ -523,7 +540,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
break; break;
default: default:
const u32 opcode = u32(inst.opcode); const u32 opcode = u32(inst.opcode);
UNREACHABLE_MSG("Unknown opcode {}", opcode); throw NotImplementedException("Opcode {}", opcode);
} }
} }
} }

View File

@ -46,6 +46,7 @@ public:
void S_AND_B32(const GcnInst& inst); void S_AND_B32(const GcnInst& inst);
void S_LSHR_B32(const GcnInst& inst); void S_LSHR_B32(const GcnInst& inst);
void S_CSELECT_B32(const GcnInst& inst); void S_CSELECT_B32(const GcnInst& inst);
void S_CSELECT_B64(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst);
// Scalar Memory // Scalar Memory

View File

@ -85,21 +85,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
} }
void Translator::V_MAD_F32(const GcnInst& inst) { void Translator::V_MAD_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1])}; const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2])}; const IR::F32 src2{GetSrc(inst.src[2], true)};
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
} }
void Translator::V_FRACT_F32(const GcnInst& inst) { void Translator::V_FRACT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.Fract(src0)); ir.SetVectorReg(dst_reg, ir.Fract(src0));
} }
void Translator::V_ADD_F32(const GcnInst& inst) { void Translator::V_ADD_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1])}; const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPAdd(src0, src1)); SetDst(inst.dst[0], ir.FPAdd(src0, src1));
} }
@ -114,14 +114,14 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
void Translator::V_MED3_F32(const GcnInst& inst) { void Translator::V_MED3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1])}; const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2])}; const IR::F32 src2{GetSrc(inst.src[2], true)};
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
} }
void Translator::V_FLOOR_F32(const GcnInst& inst) { void Translator::V_FLOOR_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0])}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
} }
@ -167,7 +167,17 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
UNREACHABLE(); UNREACHABLE();
} }
}(); }();
ir.SetVcc(result);
switch (inst.dst[1].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
break;
default:
UNREACHABLE();
}
} }
void Translator::V_MAX_F32(const GcnInst& inst) { void Translator::V_MAX_F32(const GcnInst& inst) {

View File

@ -273,8 +273,8 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
}*/ }*/
} }
U32 IREmitter::ReadConst(const U64& address, const U32& offset) { U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
return Inst<U32>(Opcode::ReadConst, address, offset); return Inst<U32>(Opcode::ReadConst, base, offset);
} }
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {

View File

@ -77,7 +77,7 @@ public:
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,

View File

@ -15,7 +15,7 @@ OPCODE(Epilogue, Void,
OPCODE(Discard, Void, ) OPCODE(Discard, Void, )
// Constant memory operations // Constant memory operations
OPCODE(ReadConst, U32, U64, U32, ) OPCODE(ReadConst, U32, U32x2, U32, )
OPCODE(ReadConstBuffer, F32, Opaque, U32, ) OPCODE(ReadConstBuffer, F32, Opaque, U32, )
OPCODE(ReadConstBufferU32, U32, Opaque, U32, ) OPCODE(ReadConstBufferU32, U32, Opaque, U32, )

View File

@ -157,16 +157,15 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory"); ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
// Retrieve offset from base. // Retrieve offset from base.
IR::Inst* addr = inst->Arg(0).InstRecursive(); const u32 dword_offset = inst->Arg(1).U32();
u32 dword_offset = addr->Arg(1).U32(); const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
addr = addr->Arg(0).InstRecursive();
ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported");
dword_offset += addr->Arg(1).U32() >> 2;
// Retrieve SGPR that holds sbase // Retrieve SGPR pair that holds sbase
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive(); const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported"); const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
const IR::ScalarReg base = inst->Arg(0).ScalarReg(); ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
const IR::ScalarReg base = sbase0->Arg(0).ScalarReg();
// Return retrieved location. // Return retrieved location.
return SharpLocation{ return SharpLocation{

View File

@ -160,8 +160,13 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();
// Recompile shader to IR. // Recompile shader to IR.
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); try {
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
} catch (const Shader::Exception& e) {
LOG_ERROR(Render_Vulkan, "{}", e.what());
std::abort();
}
// Compile IR to SPIR-V // Compile IR to SPIR-V
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding); auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);

View File

@ -58,7 +58,7 @@ LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
} }
#endif #endif
static constexpr u64 StreamBufferSize = 128_MB; static constexpr u64 StreamBufferSize = 512_MB;
static constexpr u64 PageShift = 12; static constexpr u64 PageShift = 12;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_) TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)