Merge pull request #206 from shadps4-emu/shader-again
Add more shader instructions and some memory functions
This commit is contained in:
commit
a9cbd8287c
|
@ -73,7 +73,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) {
|
int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) {
|
||||||
LOG_ERROR(Kernel_Event, "called");
|
LOG_INFO(Kernel_Event, "called");
|
||||||
ef->Clear(bitPattern);
|
ef->Clear(bitPattern);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
@ -177,10 +177,10 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
|
u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
|
||||||
|
|
||||||
if (result != ORBIS_OK) {
|
if (result != ORBIS_OK) {
|
||||||
LOG_ERROR(Kernel_Event, "returned {}", result);
|
LOG_ERROR(Kernel_Event, "returned {:#x}", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -177,7 +177,7 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
|
||||||
|
|
||||||
// Load PRX module and relocate any modules that import it.
|
// Load PRX module and relocate any modules that import it.
|
||||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||||
u32 handle = linker->LoadModule(path);
|
u32 handle = linker->LoadModule(path, true);
|
||||||
if (handle == -1) {
|
if (handle == -1) {
|
||||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -298,6 +298,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
||||||
sceKernelAllocateMainDirectMemory);
|
sceKernelAllocateMainDirectMemory);
|
||||||
LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", 1, 1,
|
LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", 1, 1,
|
||||||
sceKernelAvailableDirectMemorySize);
|
sceKernelAvailableDirectMemorySize);
|
||||||
|
LIB_FUNCTION("hwVSPCmp5tM", "libkernel", 1, "libkernel", 1, 1,
|
||||||
|
sceKernelCheckedReleaseDirectMemory);
|
||||||
LIB_FUNCTION("rVjRvHJ0X6c", "libkernel", 1, "libkernel", 1, 1, sceKernelVirtualQuery);
|
LIB_FUNCTION("rVjRvHJ0X6c", "libkernel", 1, "libkernel", 1, 1, sceKernelVirtualQuery);
|
||||||
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
|
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
|
||||||
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
|
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
|
||||||
|
@ -307,6 +309,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
|
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
|
||||||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||||
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
|
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
|
||||||
|
LIB_FUNCTION("aNz11fnnzi4", "libkernel", 1, "libkernel", 1, 1,
|
||||||
|
sceKernelAvailableFlexibleMemorySize);
|
||||||
LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory);
|
LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory);
|
||||||
LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1,
|
LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1,
|
||||||
_sceKernelRtldSetApplicationHeapAPI);
|
_sceKernelRtldSetApplicationHeapAPI);
|
||||||
|
|
|
@ -173,6 +173,13 @@ int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInf
|
||||||
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
|
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) {
|
||||||
|
auto* memory = Core::Memory::Instance();
|
||||||
|
*out_size = memory->GetAvailableFlexibleSize();
|
||||||
|
LOG_INFO(Kernel_Vmm, "called size = {:#x}", *out_size);
|
||||||
|
return ORBIS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
|
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
|
||||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||||
linker->SetHeapApiFunc(func);
|
linker->SetHeapApiFunc(func);
|
||||||
|
|
|
@ -78,6 +78,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
||||||
size_t infoSize);
|
size_t infoSize);
|
||||||
|
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut);
|
||||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
|
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
|
||||||
|
|
||||||
} // namespace Libraries::Kernel
|
} // namespace Libraries::Kernel
|
||||||
|
|
|
@ -1276,6 +1276,10 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) {
|
||||||
return pthread_once(reinterpret_cast<pthread_once_t*>(once_control), init_routine);
|
return pthread_once(reinterpret_cast<pthread_once_t*>(once_control), init_routine);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[noreturn]] void PS4_SYSV_ABI scePthreadExit(void* value_ptr) {
|
||||||
|
pthread_exit(value_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate);
|
LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate);
|
||||||
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
|
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
|
||||||
|
@ -1293,6 +1297,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, scePthreadJoin);
|
LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, scePthreadJoin);
|
||||||
LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach);
|
LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach);
|
||||||
LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual);
|
LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual);
|
||||||
|
LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit);
|
||||||
LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal);
|
LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal);
|
||||||
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
|
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
|
||||||
|
|
||||||
|
|
|
@ -56,10 +56,20 @@ int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) {
|
||||||
} else {
|
} else {
|
||||||
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
|
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
usleep(microseconds);
|
|
||||||
#endif
|
|
||||||
return 0;
|
return 0;
|
||||||
|
#else
|
||||||
|
timespec start;
|
||||||
|
timespec remain;
|
||||||
|
start.tv_sec = microseconds / 1000000;
|
||||||
|
start.tv_nsec = (microseconds % 1000000) * 1000;
|
||||||
|
timespec* requested = &start;
|
||||||
|
int ret = 0;
|
||||||
|
do {
|
||||||
|
ret = nanosleep(requested, &remain);
|
||||||
|
requested = &remain;
|
||||||
|
} while (ret != 0);
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI posix_usleep(u32 microseconds) {
|
int PS4_SYSV_ABI posix_usleep(u32 microseconds) {
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "core/libraries/kernel/memory_management.h"
|
#include "core/libraries/kernel/memory_management.h"
|
||||||
#include "core/libraries/kernel/thread_management.h"
|
#include "core/libraries/kernel/thread_management.h"
|
||||||
#include "core/linker.h"
|
#include "core/linker.h"
|
||||||
|
#include "core/memory.h"
|
||||||
#include "core/tls.h"
|
#include "core/tls.h"
|
||||||
#include "core/virtual_memory.h"
|
#include "core/virtual_memory.h"
|
||||||
|
|
||||||
|
@ -46,7 +47,7 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
|
||||||
: "rax", "rsi", "rdi");
|
: "rax", "rsi", "rdi");
|
||||||
}
|
}
|
||||||
|
|
||||||
Linker::Linker() = default;
|
Linker::Linker() : memory{Memory::Instance()} {}
|
||||||
|
|
||||||
Linker::~Linker() = default;
|
Linker::~Linker() = default;
|
||||||
|
|
||||||
|
@ -66,6 +67,11 @@ void Linker::Execute() {
|
||||||
Relocate(m.get());
|
Relocate(m.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Configure used flexible memory size.
|
||||||
|
if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) {
|
||||||
|
memory->SetTotalFlexibleSize(*flexible_size);
|
||||||
|
}
|
||||||
|
|
||||||
// Init primary thread.
|
// Init primary thread.
|
||||||
Common::SetCurrentThreadName("GAME_MainThread");
|
Common::SetCurrentThreadName("GAME_MainThread");
|
||||||
Libraries::Kernel::pthreadInitSelfMainThread();
|
Libraries::Kernel::pthreadInitSelfMainThread();
|
||||||
|
@ -90,7 +96,7 @@ void Linker::Execute() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 Linker::LoadModule(const std::filesystem::path& elf_name) {
|
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
|
|
||||||
if (!std::filesystem::exists(elf_name)) {
|
if (!std::filesystem::exists(elf_name)) {
|
||||||
|
@ -98,12 +104,13 @@ s32 Linker::LoadModule(const std::filesystem::path& elf_name) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto module = std::make_unique<Module>(elf_name, max_tls_index);
|
auto module = std::make_unique<Module>(memory, elf_name, max_tls_index);
|
||||||
if (!module->IsValid()) {
|
if (!module->IsValid()) {
|
||||||
LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string());
|
LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
num_static_modules += !is_dynamic;
|
||||||
m_modules.emplace_back(std::move(module));
|
m_modules.emplace_back(std::move(module));
|
||||||
return m_modules.size() - 1;
|
return m_modules.size() - 1;
|
||||||
}
|
}
|
||||||
|
@ -143,11 +150,13 @@ void Linker::Relocate(Module* module) {
|
||||||
case R_X86_64_RELATIVE:
|
case R_X86_64_RELATIVE:
|
||||||
rel_value = rel_base_virtual_addr + addend;
|
rel_value = rel_base_virtual_addr + addend;
|
||||||
rel_is_resolved = true;
|
rel_is_resolved = true;
|
||||||
|
module->SetRelaBit(bit_idx);
|
||||||
break;
|
break;
|
||||||
case R_X86_64_DTPMOD64:
|
case R_X86_64_DTPMOD64:
|
||||||
rel_value = static_cast<u64>(module->tls.modid);
|
rel_value = static_cast<u64>(module->tls.modid);
|
||||||
rel_is_resolved = true;
|
rel_is_resolved = true;
|
||||||
rel_sym_type = Loader::SymbolType::Tls;
|
rel_sym_type = Loader::SymbolType::Tls;
|
||||||
|
module->SetRelaBit(bit_idx);
|
||||||
break;
|
break;
|
||||||
case R_X86_64_GLOB_DAT:
|
case R_X86_64_GLOB_DAT:
|
||||||
case R_X86_64_JUMP_SLOT:
|
case R_X86_64_JUMP_SLOT:
|
||||||
|
@ -343,7 +352,8 @@ void Linker::InitTlsForThread(bool is_primary) {
|
||||||
dtv_table[1].counter = num_dtvs;
|
dtv_table[1].counter = num_dtvs;
|
||||||
|
|
||||||
// Copy init images to TLS thread blocks and map them to DTV slots.
|
// Copy init images to TLS thread blocks and map them to DTV slots.
|
||||||
for (const auto& module : m_modules) {
|
for (u32 i = 0; i < num_static_modules; i++) {
|
||||||
|
auto* module = m_modules[i].get();
|
||||||
if (module->tls.image_size == 0) {
|
if (module->tls.image_size == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,33 @@ namespace Core {
|
||||||
|
|
||||||
struct DynamicModuleInfo;
|
struct DynamicModuleInfo;
|
||||||
class Linker;
|
class Linker;
|
||||||
|
class MemoryManager;
|
||||||
|
|
||||||
|
struct OrbisKernelMemParam {
|
||||||
|
u64 size;
|
||||||
|
u64* extended_page_table;
|
||||||
|
u64* flexible_memory_size;
|
||||||
|
u8* extended_memory_1;
|
||||||
|
u64* extended_gpu_page_table;
|
||||||
|
u8* extended_memory_2;
|
||||||
|
u64* exnteded_cpu_page_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OrbisProcParam {
|
||||||
|
u64 size;
|
||||||
|
u32 magic;
|
||||||
|
u32 entry_count;
|
||||||
|
u64 sdk_version;
|
||||||
|
char* process_name;
|
||||||
|
char* main_thread_name;
|
||||||
|
u32* main_thread_prio;
|
||||||
|
u32* main_thread_stack_size;
|
||||||
|
void* libc_param;
|
||||||
|
OrbisKernelMemParam* mem_param;
|
||||||
|
void* fs_param;
|
||||||
|
u32* process_preload_enable;
|
||||||
|
u64 unknown1;
|
||||||
|
};
|
||||||
|
|
||||||
struct EntryParams {
|
struct EntryParams {
|
||||||
int argc;
|
int argc;
|
||||||
|
@ -30,8 +57,8 @@ public:
|
||||||
return m_hle_symbols;
|
return m_hle_symbols;
|
||||||
}
|
}
|
||||||
|
|
||||||
VAddr GetProcParam() const {
|
OrbisProcParam* GetProcParam() const {
|
||||||
return m_modules[0]->GetProcParam();
|
return m_modules[0]->GetProcParam<OrbisProcParam*>();
|
||||||
}
|
}
|
||||||
|
|
||||||
Module* GetModule(s32 index) const {
|
Module* GetModule(s32 index) const {
|
||||||
|
@ -59,7 +86,7 @@ public:
|
||||||
void* TlsGetAddr(u64 module_index, u64 offset);
|
void* TlsGetAddr(u64 module_index, u64 offset);
|
||||||
void InitTlsForThread(bool is_primary = false);
|
void InitTlsForThread(bool is_primary = false);
|
||||||
|
|
||||||
s32 LoadModule(const std::filesystem::path& elf_name);
|
s32 LoadModule(const std::filesystem::path& elf_name, bool is_dynamic = false);
|
||||||
Module* FindByAddress(VAddr address);
|
Module* FindByAddress(VAddr address);
|
||||||
|
|
||||||
void Relocate(Module* module);
|
void Relocate(Module* module);
|
||||||
|
@ -71,10 +98,12 @@ public:
|
||||||
private:
|
private:
|
||||||
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
|
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
|
||||||
|
|
||||||
|
MemoryManager* memory;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
u32 dtv_generation_counter{1};
|
u32 dtv_generation_counter{1};
|
||||||
size_t static_tls_size{};
|
size_t static_tls_size{};
|
||||||
u32 max_tls_index{};
|
u32 max_tls_index{};
|
||||||
|
u32 num_static_modules{};
|
||||||
HeapApiFunc heap_api_func{};
|
HeapApiFunc heap_api_func{};
|
||||||
std::vector<std::unique_ptr<Module>> m_modules;
|
std::vector<std::unique_ptr<Module>> m_modules;
|
||||||
Loader::SymbolsResolver m_hle_symbols{};
|
Loader::SymbolsResolver m_hle_symbols{};
|
||||||
|
|
|
@ -84,7 +84,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
||||||
MemoryMapFlags flags, VMAType type, std::string_view name,
|
MemoryMapFlags flags, VMAType type, std::string_view name,
|
||||||
bool is_exec, PAddr phys_addr, u64 alignment) {
|
bool is_exec, PAddr phys_addr, u64 alignment) {
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
if (type == VMAType::Flexible && total_flexible_usage + size > 448_MB) {
|
if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
|
||||||
return SCE_KERNEL_ERROR_ENOMEM;
|
return SCE_KERNEL_ERROR_ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
||||||
MapVulkanMemory(mapped_addr, size);
|
MapVulkanMemory(mapped_addr, size);
|
||||||
}
|
}
|
||||||
if (type == VMAType::Flexible) {
|
if (type == VMAType::Flexible) {
|
||||||
total_flexible_usage += size;
|
flexible_usage += size;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -184,7 +184,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
||||||
UnmapVulkanMemory(virtual_addr, size);
|
UnmapVulkanMemory(virtual_addr, size);
|
||||||
}
|
}
|
||||||
if (type == VMAType::Flexible) {
|
if (type == VMAType::Flexible) {
|
||||||
total_flexible_usage -= size;
|
flexible_usage -= size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark region as free and attempt to coalesce it with neighbours.
|
// Mark region as free and attempt to coalesce it with neighbours.
|
||||||
|
|
|
@ -124,6 +124,14 @@ public:
|
||||||
instance = instance_;
|
instance = instance_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetTotalFlexibleSize(u64 size) {
|
||||||
|
total_flexible_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetAvailableFlexibleSize() const {
|
||||||
|
return total_flexible_size - flexible_usage;
|
||||||
|
}
|
||||||
|
|
||||||
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
|
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
|
||||||
int memory_type);
|
int memory_type);
|
||||||
|
|
||||||
|
@ -195,7 +203,8 @@ private:
|
||||||
DMemMap dmem_map;
|
DMemMap dmem_map;
|
||||||
VMAMap vma_map;
|
VMAMap vma_map;
|
||||||
std::recursive_mutex mutex;
|
std::recursive_mutex mutex;
|
||||||
size_t total_flexible_usage{};
|
size_t total_flexible_size = 448_MB;
|
||||||
|
size_t flexible_usage{};
|
||||||
|
|
||||||
struct MappedMemory {
|
struct MappedMemory {
|
||||||
vk::UniqueBuffer buffer;
|
vk::UniqueBuffer buffer;
|
||||||
|
|
|
@ -55,8 +55,8 @@ static std::string EncodeId(u64 nVal) {
|
||||||
return enc;
|
return enc;
|
||||||
}
|
}
|
||||||
|
|
||||||
Module::Module(const std::filesystem::path& file_, u32& max_tls_index)
|
Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index)
|
||||||
: file{file_}, name{file.stem().string()} {
|
: memory{memory_}, file{file_}, name{file.stem().string()} {
|
||||||
elf.Open(file);
|
elf.Open(file);
|
||||||
if (elf.IsElfFile()) {
|
if (elf.IsElfFile()) {
|
||||||
LoadModuleToMemory(max_tls_index);
|
LoadModuleToMemory(max_tls_index);
|
||||||
|
@ -84,7 +84,6 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
|
||||||
aligned_base_size = Common::AlignUp(base_size, BlockAlign);
|
aligned_base_size = Common::AlignUp(base_size, BlockAlign);
|
||||||
|
|
||||||
// Map module segments (and possible TLS trampolines)
|
// Map module segments (and possible TLS trampolines)
|
||||||
auto* memory = Core::Memory::Instance();
|
|
||||||
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
|
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
|
||||||
memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize,
|
memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize,
|
||||||
MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true);
|
MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true);
|
||||||
|
|
|
@ -137,10 +137,12 @@ struct DynamicModuleInfo {
|
||||||
};
|
};
|
||||||
|
|
||||||
using ModuleFunc = int (*)(size_t, const void*);
|
using ModuleFunc = int (*)(size_t, const void*);
|
||||||
|
class MemoryManager;
|
||||||
|
|
||||||
class Module {
|
class Module {
|
||||||
public:
|
public:
|
||||||
explicit Module(const std::filesystem::path& file, u32& max_tls_index);
|
explicit Module(Core::MemoryManager* memory, const std::filesystem::path& file,
|
||||||
|
u32& max_tls_index);
|
||||||
~Module();
|
~Module();
|
||||||
|
|
||||||
VAddr GetBaseAddress() const noexcept {
|
VAddr GetBaseAddress() const noexcept {
|
||||||
|
@ -220,6 +222,7 @@ public:
|
||||||
const LibraryInfo* FindLibrary(std::string_view id);
|
const LibraryInfo* FindLibrary(std::string_view id);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
Core::MemoryManager* memory;
|
||||||
std::filesystem::path file;
|
std::filesystem::path file;
|
||||||
std::string name;
|
std::string name;
|
||||||
Loader::Elf elf;
|
Loader::Elf elf;
|
||||||
|
|
|
@ -54,7 +54,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
|
||||||
Id ms) {
|
Id ms) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
|
if (Sirit::ValidId(lod)) {
|
||||||
|
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
|
||||||
|
} else {
|
||||||
|
return ctx.OpImageFetch(ctx.F32[4], image, coords);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
|
||||||
|
|
|
@ -216,6 +216,14 @@ void Translator::S_AND_B32(const GcnInst& inst) {
|
||||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_OR_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 result{ir.BitwiseOr(src0, src1)};
|
||||||
|
SetDst(inst.dst[0], result);
|
||||||
|
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
@ -285,4 +293,36 @@ void Translator::S_BFM_B32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
|
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_NOT_B64(const GcnInst& inst) {
|
||||||
|
const auto get_src = [&](const InstOperand& operand) {
|
||||||
|
switch (operand.field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
return ir.GetVcc();
|
||||||
|
case OperandField::ExecLo:
|
||||||
|
return ir.GetExec();
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const IR::U1 src0{get_src(inst.src[0])};
|
||||||
|
const IR::U1 result = ir.LogicalNot(src0);
|
||||||
|
ir.SetScc(result);
|
||||||
|
switch (inst.dst[0].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(result);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::S_BREV_B32(const GcnInst& inst) {
|
||||||
|
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -324,7 +324,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
translator.IMAGE_STORE(inst);
|
translator.IMAGE_STORE(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::IMAGE_LOAD_MIP:
|
case Opcode::IMAGE_LOAD_MIP:
|
||||||
translator.IMAGE_LOAD_MIP(inst);
|
translator.IMAGE_LOAD(true, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::IMAGE_LOAD:
|
||||||
|
translator.IMAGE_LOAD(false, inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::V_CMP_GE_I32:
|
case Opcode::V_CMP_GE_I32:
|
||||||
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||||
|
@ -335,6 +338,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_CMP_LE_I32:
|
case Opcode::V_CMP_LE_I32:
|
||||||
translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
|
translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_CMP_NE_I32:
|
||||||
|
translator.V_CMP_U32(ConditionOp::LG, true, false, inst);
|
||||||
|
break;
|
||||||
case Opcode::V_CMP_NE_U32:
|
case Opcode::V_CMP_NE_U32:
|
||||||
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
|
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -386,6 +392,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_CMP_NLT_F32:
|
case Opcode::V_CMP_NLT_F32:
|
||||||
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_CMP_LT_U32:
|
||||||
|
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_CMP_LG_U32:
|
case Opcode::S_CMP_LG_U32:
|
||||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -585,6 +594,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_AND_B64:
|
case Opcode::S_AND_B64:
|
||||||
translator.S_AND_B64(false, inst);
|
translator.S_AND_B64(false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_NOT_B64:
|
||||||
|
translator.S_NOT_B64(inst);
|
||||||
|
break;
|
||||||
case Opcode::S_NAND_B64:
|
case Opcode::S_NAND_B64:
|
||||||
translator.S_AND_B64(true, inst);
|
translator.S_AND_B64(true, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -627,6 +639,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_AND_B32:
|
case Opcode::S_AND_B32:
|
||||||
translator.S_AND_B32(inst);
|
translator.S_AND_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::S_OR_B32:
|
||||||
|
translator.S_OR_B32(inst);
|
||||||
|
break;
|
||||||
case Opcode::S_LSHR_B32:
|
case Opcode::S_LSHR_B32:
|
||||||
translator.S_LSHR_B32(inst);
|
translator.S_LSHR_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -657,9 +672,27 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_BFM_B32:
|
case Opcode::S_BFM_B32:
|
||||||
translator.S_BFM_B32(inst);
|
translator.S_BFM_B32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_MIN_U32:
|
||||||
|
translator.V_MIN_U32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CMP_NE_U64:
|
||||||
|
translator.V_CMP_NE_U64(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_TRUNC_F32:
|
case Opcode::V_TRUNC_F32:
|
||||||
translator.V_TRUNC_F32(inst);
|
translator.V_TRUNC_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_CEIL_F32:
|
||||||
|
translator.V_CEIL_F32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_BFI_B32:
|
||||||
|
translator.V_BFI_B32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_BREV_B32:
|
||||||
|
translator.S_BREV_B32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_TTRACEDATA:
|
||||||
|
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||||
|
break;
|
||||||
case Opcode::S_NOP:
|
case Opcode::S_NOP:
|
||||||
case Opcode::S_CBRANCH_EXECZ:
|
case Opcode::S_CBRANCH_EXECZ:
|
||||||
case Opcode::S_CBRANCH_SCC0:
|
case Opcode::S_CBRANCH_SCC0:
|
||||||
|
|
|
@ -45,12 +45,15 @@ public:
|
||||||
void S_AND_B64(bool negate, const GcnInst& inst);
|
void S_AND_B64(bool negate, const GcnInst& inst);
|
||||||
void S_ADD_I32(const GcnInst& inst);
|
void S_ADD_I32(const GcnInst& inst);
|
||||||
void S_AND_B32(const GcnInst& inst);
|
void S_AND_B32(const GcnInst& inst);
|
||||||
|
void S_OR_B32(const GcnInst& inst);
|
||||||
void S_LSHR_B32(const GcnInst& inst);
|
void S_LSHR_B32(const GcnInst& inst);
|
||||||
void S_CSELECT_B32(const GcnInst& inst);
|
void S_CSELECT_B32(const GcnInst& inst);
|
||||||
void S_CSELECT_B64(const GcnInst& inst);
|
void S_CSELECT_B64(const GcnInst& inst);
|
||||||
void S_BFE_U32(const GcnInst& inst);
|
void S_BFE_U32(const GcnInst& inst);
|
||||||
void S_LSHL_B32(const GcnInst& inst);
|
void S_LSHL_B32(const GcnInst& inst);
|
||||||
void S_BFM_B32(const GcnInst& inst);
|
void S_BFM_B32(const GcnInst& inst);
|
||||||
|
void S_NOT_B64(const GcnInst& inst);
|
||||||
|
void S_BREV_B32(const GcnInst& inst);
|
||||||
|
|
||||||
// Scalar Memory
|
// Scalar Memory
|
||||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||||
|
@ -115,6 +118,10 @@ public:
|
||||||
void V_MIN_I32(const GcnInst& inst);
|
void V_MIN_I32(const GcnInst& inst);
|
||||||
void V_MUL_LO_U32(const GcnInst& inst);
|
void V_MUL_LO_U32(const GcnInst& inst);
|
||||||
void V_TRUNC_F32(const GcnInst& inst);
|
void V_TRUNC_F32(const GcnInst& inst);
|
||||||
|
void V_CEIL_F32(const GcnInst& inst);
|
||||||
|
void V_MIN_U32(const GcnInst& inst);
|
||||||
|
void V_CMP_NE_U64(const GcnInst& inst);
|
||||||
|
void V_BFI_B32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||||
|
@ -132,7 +139,7 @@ public:
|
||||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||||
void IMAGE_STORE(const GcnInst& inst);
|
void IMAGE_STORE(const GcnInst& inst);
|
||||||
void IMAGE_LOAD_MIP(const GcnInst& inst);
|
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||||
|
|
||||||
// Export
|
// Export
|
||||||
void EXP(const GcnInst& inst);
|
void EXP(const GcnInst& inst);
|
||||||
|
|
|
@ -430,4 +430,52 @@ void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
|
SetDst(inst.dst[0], ir.FPCeil(src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_MIN_U32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
SetDst(inst.dst[0], ir.IMin(src0, src1, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_CMP_NE_U64(const GcnInst& inst) {
|
||||||
|
const auto get_src = [&](const InstOperand& operand) {
|
||||||
|
switch (operand.field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
return ir.GetVcc();
|
||||||
|
case OperandField::ExecLo:
|
||||||
|
return ir.GetExec();
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||||
|
case OperandField::ConstZero:
|
||||||
|
return ir.Imm1(false);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const IR::U1 src0{get_src(inst.src[0])};
|
||||||
|
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0
|
||||||
|
switch (inst.dst[1].field) {
|
||||||
|
case OperandField::VccLo:
|
||||||
|
ir.SetVcc(src0);
|
||||||
|
break;
|
||||||
|
case OperandField::ScalarGPR:
|
||||||
|
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_BFI_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||||
|
SetDst(inst.dst[0],
|
||||||
|
ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2)));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -31,7 +31,9 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||||
|
|
||||||
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
const auto& mimg = inst.control.mimg;
|
const auto& mimg = inst.control.mimg;
|
||||||
ASSERT(!mimg.da);
|
if (mimg.da) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
|
||||||
|
}
|
||||||
|
|
||||||
IR::VectorReg addr_reg{inst.src[0].code};
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||||
|
@ -107,7 +109,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
|
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||||
const auto& mimg = inst.control.mimg;
|
const auto& mimg = inst.control.mimg;
|
||||||
IR::VectorReg addr_reg{inst.src[0].code};
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||||
|
@ -119,7 +121,7 @@ void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
|
||||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||||
|
|
||||||
IR::TextureInstInfo info{};
|
IR::TextureInstInfo info{};
|
||||||
info.explicit_lod.Assign(1);
|
info.explicit_lod.Assign(has_mip);
|
||||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
|
|
@ -251,7 +251,9 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||||
|
|
||||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
ASSERT(producer->GetOpcode() ==
|
||||||
|
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||||
|
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||||
producer->GetOpcode() == IR::Opcode::GetUserData);
|
producer->GetOpcode() == IR::Opcode::GetUserData);
|
||||||
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||||
|
|
|
@ -827,7 +827,8 @@ struct Liverpool {
|
||||||
PolygonControl polygon_control;
|
PolygonControl polygon_control;
|
||||||
ViewportControl viewport_control;
|
ViewportControl viewport_control;
|
||||||
VsOutputControl vs_output_control;
|
VsOutputControl vs_output_control;
|
||||||
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 1);
|
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 2);
|
||||||
|
u32 index_size;
|
||||||
u32 max_index_size;
|
u32 max_index_size;
|
||||||
IndexBufferType index_buffer_type;
|
IndexBufferType index_buffer_type;
|
||||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||||
|
@ -993,6 +994,7 @@ static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
|
||||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||||
pipeline_cache{instance, scheduler, liverpool},
|
pipeline_cache{instance, scheduler, liverpool},
|
||||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} {
|
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
liverpool->BindRasterizer(this);
|
liverpool->BindRasterizer(this);
|
||||||
}
|
}
|
||||||
|
|
|
@ -191,6 +191,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
case vk::Format::eBc3SrgbBlock:
|
case vk::Format::eBc3SrgbBlock:
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case vk::Format::eBc3UnormBlock:
|
case vk::Format::eBc3UnormBlock:
|
||||||
|
case vk::Format::eBc7SrgbBlock:
|
||||||
return vk::Format::eR32G32B32A32Uint;
|
return vk::Format::eR32G32B32A32Uint;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -225,7 +226,8 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
|
||||||
vk::BufferUsageFlagBits::eStorageBuffer;
|
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||||
|
|
||||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||||
: instance{instance}, scheduler{scheduler}, staging{instance, scheduler, StagingFlags, 64_MB} {
|
: instance{instance}, scheduler{scheduler},
|
||||||
|
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
||||||
|
|
||||||
static const std::array detiler_shaders{
|
static const std::array detiler_shaders{
|
||||||
HostShaders::DETILE_M8X1_COMP,
|
HostShaders::DETILE_M8X1_COMP,
|
||||||
|
|
Loading…
Reference in New Issue