Merge pull request #206 from shadps4-emu/shader-again

Add more shader instructions and some memory functions
This commit is contained in:
georgemoralis 2024-06-22 18:19:08 +03:00 committed by GitHub
commit a9cbd8287c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 249 additions and 32 deletions

View File

@ -73,7 +73,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) { int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) {
LOG_ERROR(Kernel_Event, "called"); LOG_INFO(Kernel_Event, "called");
ef->Clear(bitPattern); ef->Clear(bitPattern);
return ORBIS_OK; return ORBIS_OK;
} }
@ -177,10 +177,10 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
UNREACHABLE(); UNREACHABLE();
} }
auto result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout); u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
if (result != ORBIS_OK) { if (result != ORBIS_OK) {
LOG_ERROR(Kernel_Event, "returned {}", result); LOG_ERROR(Kernel_Event, "returned {:#x}", result);
} }
return result; return result;

View File

@ -177,7 +177,7 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
// Load PRX module and relocate any modules that import it. // Load PRX module and relocate any modules that import it.
auto* linker = Common::Singleton<Core::Linker>::Instance(); auto* linker = Common::Singleton<Core::Linker>::Instance();
u32 handle = linker->LoadModule(path); u32 handle = linker->LoadModule(path, true);
if (handle == -1) { if (handle == -1) {
return ORBIS_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
@ -298,6 +298,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
sceKernelAllocateMainDirectMemory); sceKernelAllocateMainDirectMemory);
LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", 1, 1, LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", 1, 1,
sceKernelAvailableDirectMemorySize); sceKernelAvailableDirectMemorySize);
LIB_FUNCTION("hwVSPCmp5tM", "libkernel", 1, "libkernel", 1, 1,
sceKernelCheckedReleaseDirectMemory);
LIB_FUNCTION("rVjRvHJ0X6c", "libkernel", 1, "libkernel", 1, 1, sceKernelVirtualQuery); LIB_FUNCTION("rVjRvHJ0X6c", "libkernel", 1, "libkernel", 1, 1, sceKernelVirtualQuery);
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory); LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
@ -307,6 +309,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory); LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory); LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
LIB_FUNCTION("aNz11fnnzi4", "libkernel", 1, "libkernel", 1, 1,
sceKernelAvailableFlexibleMemorySize);
LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory); LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory);
LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1, LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1,
_sceKernelRtldSetApplicationHeapAPI); _sceKernelRtldSetApplicationHeapAPI);

View File

@ -173,6 +173,13 @@ int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInf
return memory->DirectMemoryQuery(offset, flags == 1, query_info); return memory->DirectMemoryQuery(offset, flags == 1, query_info);
} }
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) {
auto* memory = Core::Memory::Instance();
*out_size = memory->GetAvailableFlexibleSize();
LOG_INFO(Kernel_Vmm, "called size = {:#x}", *out_size);
return ORBIS_OK;
}
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) { void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
auto* linker = Common::Singleton<Core::Linker>::Instance(); auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->SetHeapApiFunc(func); linker->SetHeapApiFunc(func);

View File

@ -78,6 +78,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize); size_t infoSize);
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut);
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func); void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View File

@ -1276,6 +1276,10 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) {
return pthread_once(reinterpret_cast<pthread_once_t*>(once_control), init_routine); return pthread_once(reinterpret_cast<pthread_once_t*>(once_control), init_routine);
} }
[[noreturn]] void PS4_SYSV_ABI scePthreadExit(void* value_ptr) {
pthread_exit(value_ptr);
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate);
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init); LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
@ -1293,6 +1297,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, scePthreadJoin); LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, scePthreadJoin);
LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach); LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach);
LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual); LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual);
LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit);
LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal); LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal);
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);

View File

@ -56,10 +56,20 @@ int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) {
} else { } else {
std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
} }
#else
usleep(microseconds);
#endif
return 0; return 0;
#else
timespec start;
timespec remain;
start.tv_sec = microseconds / 1000000;
start.tv_nsec = (microseconds % 1000000) * 1000;
timespec* requested = &start;
int ret = 0;
do {
ret = nanosleep(requested, &remain);
requested = &remain;
} while (ret != 0);
return ret;
#endif
} }
int PS4_SYSV_ABI posix_usleep(u32 microseconds) { int PS4_SYSV_ABI posix_usleep(u32 microseconds) {

View File

@ -13,6 +13,7 @@
#include "core/libraries/kernel/memory_management.h" #include "core/libraries/kernel/memory_management.h"
#include "core/libraries/kernel/thread_management.h" #include "core/libraries/kernel/thread_management.h"
#include "core/linker.h" #include "core/linker.h"
#include "core/memory.h"
#include "core/tls.h" #include "core/tls.h"
#include "core/virtual_memory.h" #include "core/virtual_memory.h"
@ -46,7 +47,7 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
: "rax", "rsi", "rdi"); : "rax", "rsi", "rdi");
} }
Linker::Linker() = default; Linker::Linker() : memory{Memory::Instance()} {}
Linker::~Linker() = default; Linker::~Linker() = default;
@ -66,6 +67,11 @@ void Linker::Execute() {
Relocate(m.get()); Relocate(m.get());
} }
// Configure used flexible memory size.
if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) {
memory->SetTotalFlexibleSize(*flexible_size);
}
// Init primary thread. // Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread"); Common::SetCurrentThreadName("GAME_MainThread");
Libraries::Kernel::pthreadInitSelfMainThread(); Libraries::Kernel::pthreadInitSelfMainThread();
@ -90,7 +96,7 @@ void Linker::Execute() {
} }
} }
s32 Linker::LoadModule(const std::filesystem::path& elf_name) { s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
std::scoped_lock lk{mutex}; std::scoped_lock lk{mutex};
if (!std::filesystem::exists(elf_name)) { if (!std::filesystem::exists(elf_name)) {
@ -98,12 +104,13 @@ s32 Linker::LoadModule(const std::filesystem::path& elf_name) {
return -1; return -1;
} }
auto module = std::make_unique<Module>(elf_name, max_tls_index); auto module = std::make_unique<Module>(memory, elf_name, max_tls_index);
if (!module->IsValid()) { if (!module->IsValid()) {
LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string()); LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string());
return -1; return -1;
} }
num_static_modules += !is_dynamic;
m_modules.emplace_back(std::move(module)); m_modules.emplace_back(std::move(module));
return m_modules.size() - 1; return m_modules.size() - 1;
} }
@ -143,11 +150,13 @@ void Linker::Relocate(Module* module) {
case R_X86_64_RELATIVE: case R_X86_64_RELATIVE:
rel_value = rel_base_virtual_addr + addend; rel_value = rel_base_virtual_addr + addend;
rel_is_resolved = true; rel_is_resolved = true;
module->SetRelaBit(bit_idx);
break; break;
case R_X86_64_DTPMOD64: case R_X86_64_DTPMOD64:
rel_value = static_cast<u64>(module->tls.modid); rel_value = static_cast<u64>(module->tls.modid);
rel_is_resolved = true; rel_is_resolved = true;
rel_sym_type = Loader::SymbolType::Tls; rel_sym_type = Loader::SymbolType::Tls;
module->SetRelaBit(bit_idx);
break; break;
case R_X86_64_GLOB_DAT: case R_X86_64_GLOB_DAT:
case R_X86_64_JUMP_SLOT: case R_X86_64_JUMP_SLOT:
@ -343,7 +352,8 @@ void Linker::InitTlsForThread(bool is_primary) {
dtv_table[1].counter = num_dtvs; dtv_table[1].counter = num_dtvs;
// Copy init images to TLS thread blocks and map them to DTV slots. // Copy init images to TLS thread blocks and map them to DTV slots.
for (const auto& module : m_modules) { for (u32 i = 0; i < num_static_modules; i++) {
auto* module = m_modules[i].get();
if (module->tls.image_size == 0) { if (module->tls.image_size == 0) {
continue; continue;
} }

View File

@ -12,6 +12,33 @@ namespace Core {
struct DynamicModuleInfo; struct DynamicModuleInfo;
class Linker; class Linker;
class MemoryManager;
struct OrbisKernelMemParam {
u64 size;
u64* extended_page_table;
u64* flexible_memory_size;
u8* extended_memory_1;
u64* extended_gpu_page_table;
u8* extended_memory_2;
u64* exnteded_cpu_page_table;
};
struct OrbisProcParam {
u64 size;
u32 magic;
u32 entry_count;
u64 sdk_version;
char* process_name;
char* main_thread_name;
u32* main_thread_prio;
u32* main_thread_stack_size;
void* libc_param;
OrbisKernelMemParam* mem_param;
void* fs_param;
u32* process_preload_enable;
u64 unknown1;
};
struct EntryParams { struct EntryParams {
int argc; int argc;
@ -30,8 +57,8 @@ public:
return m_hle_symbols; return m_hle_symbols;
} }
VAddr GetProcParam() const { OrbisProcParam* GetProcParam() const {
return m_modules[0]->GetProcParam(); return m_modules[0]->GetProcParam<OrbisProcParam*>();
} }
Module* GetModule(s32 index) const { Module* GetModule(s32 index) const {
@ -59,7 +86,7 @@ public:
void* TlsGetAddr(u64 module_index, u64 offset); void* TlsGetAddr(u64 module_index, u64 offset);
void InitTlsForThread(bool is_primary = false); void InitTlsForThread(bool is_primary = false);
s32 LoadModule(const std::filesystem::path& elf_name); s32 LoadModule(const std::filesystem::path& elf_name, bool is_dynamic = false);
Module* FindByAddress(VAddr address); Module* FindByAddress(VAddr address);
void Relocate(Module* module); void Relocate(Module* module);
@ -71,10 +98,12 @@ public:
private: private:
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l); const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
MemoryManager* memory;
std::mutex mutex; std::mutex mutex;
u32 dtv_generation_counter{1}; u32 dtv_generation_counter{1};
size_t static_tls_size{}; size_t static_tls_size{};
u32 max_tls_index{}; u32 max_tls_index{};
u32 num_static_modules{};
HeapApiFunc heap_api_func{}; HeapApiFunc heap_api_func{};
std::vector<std::unique_ptr<Module>> m_modules; std::vector<std::unique_ptr<Module>> m_modules;
Loader::SymbolsResolver m_hle_symbols{}; Loader::SymbolsResolver m_hle_symbols{};

View File

@ -84,7 +84,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
MemoryMapFlags flags, VMAType type, std::string_view name, MemoryMapFlags flags, VMAType type, std::string_view name,
bool is_exec, PAddr phys_addr, u64 alignment) { bool is_exec, PAddr phys_addr, u64 alignment) {
std::scoped_lock lk{mutex}; std::scoped_lock lk{mutex};
if (type == VMAType::Flexible && total_flexible_usage + size > 448_MB) { if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
return SCE_KERNEL_ERROR_ENOMEM; return SCE_KERNEL_ERROR_ENOMEM;
} }
@ -106,7 +106,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
MapVulkanMemory(mapped_addr, size); MapVulkanMemory(mapped_addr, size);
} }
if (type == VMAType::Flexible) { if (type == VMAType::Flexible) {
total_flexible_usage += size; flexible_usage += size;
} }
}; };
@ -184,7 +184,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
UnmapVulkanMemory(virtual_addr, size); UnmapVulkanMemory(virtual_addr, size);
} }
if (type == VMAType::Flexible) { if (type == VMAType::Flexible) {
total_flexible_usage -= size; flexible_usage -= size;
} }
// Mark region as free and attempt to coalesce it with neighbours. // Mark region as free and attempt to coalesce it with neighbours.

View File

@ -124,6 +124,14 @@ public:
instance = instance_; instance = instance_;
} }
void SetTotalFlexibleSize(u64 size) {
total_flexible_size = size;
}
u64 GetAvailableFlexibleSize() const {
return total_flexible_size - flexible_usage;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type); int memory_type);
@ -195,7 +203,8 @@ private:
DMemMap dmem_map; DMemMap dmem_map;
VMAMap vma_map; VMAMap vma_map;
std::recursive_mutex mutex; std::recursive_mutex mutex;
size_t total_flexible_usage{}; size_t total_flexible_size = 448_MB;
size_t flexible_usage{};
struct MappedMemory { struct MappedMemory {
vk::UniqueBuffer buffer; vk::UniqueBuffer buffer;

View File

@ -55,8 +55,8 @@ static std::string EncodeId(u64 nVal) {
return enc; return enc;
} }
Module::Module(const std::filesystem::path& file_, u32& max_tls_index) Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index)
: file{file_}, name{file.stem().string()} { : memory{memory_}, file{file_}, name{file.stem().string()} {
elf.Open(file); elf.Open(file);
if (elf.IsElfFile()) { if (elf.IsElfFile()) {
LoadModuleToMemory(max_tls_index); LoadModuleToMemory(max_tls_index);
@ -84,7 +84,6 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
aligned_base_size = Common::AlignUp(base_size, BlockAlign); aligned_base_size = Common::AlignUp(base_size, BlockAlign);
// Map module segments (and possible TLS trampolines) // Map module segments (and possible TLS trampolines)
auto* memory = Core::Memory::Instance();
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr); void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize, memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize,
MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true); MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true);

View File

@ -137,10 +137,12 @@ struct DynamicModuleInfo {
}; };
using ModuleFunc = int (*)(size_t, const void*); using ModuleFunc = int (*)(size_t, const void*);
class MemoryManager;
class Module { class Module {
public: public:
explicit Module(const std::filesystem::path& file, u32& max_tls_index); explicit Module(Core::MemoryManager* memory, const std::filesystem::path& file,
u32& max_tls_index);
~Module(); ~Module();
VAddr GetBaseAddress() const noexcept { VAddr GetBaseAddress() const noexcept {
@ -220,6 +222,7 @@ public:
const LibraryInfo* FindLibrary(std::string_view id); const LibraryInfo* FindLibrary(std::string_view id);
public: public:
Core::MemoryManager* memory;
std::filesystem::path file; std::filesystem::path file;
std::string name; std::string name;
Loader::Elf elf; Loader::Elf elf;

View File

@ -54,7 +54,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
Id ms) { Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
if (Sirit::ValidId(lod)) {
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod); return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
} else {
return ctx.OpImageFetch(ctx.F32[4], image, coords);
}
} }
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,

View File

@ -216,6 +216,14 @@ void Translator::S_AND_B32(const GcnInst& inst) {
ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
} }
void Translator::S_OR_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result{ir.BitwiseOr(src0, src1)};
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_LSHR_B32(const GcnInst& inst) { void Translator::S_LSHR_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
@ -285,4 +293,36 @@ void Translator::S_BFM_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1)); SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
} }
void Translator::S_NOT_B64(const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
default:
UNREACHABLE();
}
};
const IR::U1 src0{get_src(inst.src[0])};
const IR::U1 result = ir.LogicalNot(src0);
ir.SetScc(result);
switch (inst.dst[0].field) {
case OperandField::VccLo:
ir.SetVcc(result);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
default:
UNREACHABLE();
}
}
void Translator::S_BREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -324,7 +324,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.IMAGE_STORE(inst); translator.IMAGE_STORE(inst);
break; break;
case Opcode::IMAGE_LOAD_MIP: case Opcode::IMAGE_LOAD_MIP:
translator.IMAGE_LOAD_MIP(inst); translator.IMAGE_LOAD(true, inst);
break;
case Opcode::IMAGE_LOAD:
translator.IMAGE_LOAD(false, inst);
break; break;
case Opcode::V_CMP_GE_I32: case Opcode::V_CMP_GE_I32:
translator.V_CMP_U32(ConditionOp::GE, true, false, inst); translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
@ -335,6 +338,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_LE_I32: case Opcode::V_CMP_LE_I32:
translator.V_CMP_U32(ConditionOp::LE, true, false, inst); translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
break; break;
case Opcode::V_CMP_NE_I32:
translator.V_CMP_U32(ConditionOp::LG, true, false, inst);
break;
case Opcode::V_CMP_NE_U32: case Opcode::V_CMP_NE_U32:
translator.V_CMP_U32(ConditionOp::LG, false, false, inst); translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
break; break;
@ -386,6 +392,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NLT_F32: case Opcode::V_CMP_NLT_F32:
translator.V_CMP_F32(ConditionOp::GE, false, inst); translator.V_CMP_F32(ConditionOp::GE, false, inst);
break; break;
case Opcode::S_CMP_LT_U32:
translator.S_CMP(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LG_U32: case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst); translator.S_CMP(ConditionOp::LG, false, inst);
break; break;
@ -585,6 +594,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_AND_B64: case Opcode::S_AND_B64:
translator.S_AND_B64(false, inst); translator.S_AND_B64(false, inst);
break; break;
case Opcode::S_NOT_B64:
translator.S_NOT_B64(inst);
break;
case Opcode::S_NAND_B64: case Opcode::S_NAND_B64:
translator.S_AND_B64(true, inst); translator.S_AND_B64(true, inst);
break; break;
@ -627,6 +639,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_AND_B32: case Opcode::S_AND_B32:
translator.S_AND_B32(inst); translator.S_AND_B32(inst);
break; break;
case Opcode::S_OR_B32:
translator.S_OR_B32(inst);
break;
case Opcode::S_LSHR_B32: case Opcode::S_LSHR_B32:
translator.S_LSHR_B32(inst); translator.S_LSHR_B32(inst);
break; break;
@ -657,9 +672,27 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_BFM_B32: case Opcode::S_BFM_B32:
translator.S_BFM_B32(inst); translator.S_BFM_B32(inst);
break; break;
case Opcode::V_MIN_U32:
translator.V_MIN_U32(inst);
break;
case Opcode::V_CMP_NE_U64:
translator.V_CMP_NE_U64(inst);
break;
case Opcode::V_TRUNC_F32: case Opcode::V_TRUNC_F32:
translator.V_TRUNC_F32(inst); translator.V_TRUNC_F32(inst);
break; break;
case Opcode::V_CEIL_F32:
translator.V_CEIL_F32(inst);
break;
case Opcode::V_BFI_B32:
translator.V_BFI_B32(inst);
break;
case Opcode::S_BREV_B32:
translator.S_BREV_B32(inst);
break;
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break;
case Opcode::S_NOP: case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:

View File

@ -45,12 +45,15 @@ public:
void S_AND_B64(bool negate, const GcnInst& inst); void S_AND_B64(bool negate, const GcnInst& inst);
void S_ADD_I32(const GcnInst& inst); void S_ADD_I32(const GcnInst& inst);
void S_AND_B32(const GcnInst& inst); void S_AND_B32(const GcnInst& inst);
void S_OR_B32(const GcnInst& inst);
void S_LSHR_B32(const GcnInst& inst); void S_LSHR_B32(const GcnInst& inst);
void S_CSELECT_B32(const GcnInst& inst); void S_CSELECT_B32(const GcnInst& inst);
void S_CSELECT_B64(const GcnInst& inst); void S_CSELECT_B64(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst);
void S_LSHL_B32(const GcnInst& inst); void S_LSHL_B32(const GcnInst& inst);
void S_BFM_B32(const GcnInst& inst); void S_BFM_B32(const GcnInst& inst);
void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst);
// Scalar Memory // Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -115,6 +118,10 @@ public:
void V_MIN_I32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst);
void V_TRUNC_F32(const GcnInst& inst); void V_TRUNC_F32(const GcnInst& inst);
void V_CEIL_F32(const GcnInst& inst);
void V_MIN_U32(const GcnInst& inst);
void V_CMP_NE_U64(const GcnInst& inst);
void V_BFI_B32(const GcnInst& inst);
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
@ -132,7 +139,7 @@ public:
void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst);
void IMAGE_SAMPLE(const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst);
void IMAGE_STORE(const GcnInst& inst); void IMAGE_STORE(const GcnInst& inst);
void IMAGE_LOAD_MIP(const GcnInst& inst); void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
// Export // Export
void EXP(const GcnInst& inst); void EXP(const GcnInst& inst);

View File

@ -430,4 +430,52 @@ void Translator::V_TRUNC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPTrunc(src0)); SetDst(inst.dst[0], ir.FPTrunc(src0));
} }
void Translator::V_CEIL_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPCeil(src0));
}
void Translator::V_MIN_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IMin(src0, src1, false));
}
void Translator::V_CMP_NE_U64(const GcnInst& inst) {
const auto get_src = [&](const InstOperand& operand) {
switch (operand.field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
case OperandField::ConstZero:
return ir.Imm1(false);
default:
UNREACHABLE();
}
};
const IR::U1 src0{get_src(inst.src[0])};
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0
switch (inst.dst[1].field) {
case OperandField::VccLo:
ir.SetVcc(src0);
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0);
break;
default:
UNREACHABLE();
}
}
void Translator::V_BFI_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0],
ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2)));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -31,7 +31,9 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
void Translator::IMAGE_SAMPLE(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;
ASSERT(!mimg.da); if (mimg.da) {
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
}
IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code}; IR::VectorReg dest_reg{inst.dst[0].code};
@ -107,7 +109,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
} }
} }
void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) { void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;
IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code}; IR::VectorReg dest_reg{inst.dst[0].code};
@ -119,7 +121,7 @@ void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
IR::TextureInstInfo info{}; IR::TextureInstInfo info{};
info.explicit_lod.Assign(1); info.explicit_lod.Assign(has_mip);
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info); const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {

View File

@ -251,7 +251,9 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
IR::Inst* producer = inst.Arg(0).InstRecursive(); IR::Inst* producer = inst.Arg(0).InstRecursive();
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || ASSERT(producer->GetOpcode() ==
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
producer->GetOpcode() == IR::Opcode::GetUserData); producer->GetOpcode() == IR::Opcode::GetUserData);
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> { const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {

View File

@ -827,7 +827,8 @@ struct Liverpool {
PolygonControl polygon_control; PolygonControl polygon_control;
ViewportControl viewport_control; ViewportControl viewport_control;
VsOutputControl vs_output_control; VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 1); INSERT_PADDING_WORDS(0xA29E - 0xA207 - 2);
u32 index_size;
u32 max_index_size; u32 max_index_size;
IndexBufferType index_buffer_type; IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
@ -993,6 +994,7 @@ static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);

View File

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()}, liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool}, pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} { vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} {
if (!Config::nullGpu()) { if (!Config::nullGpu()) {
liverpool->BindRasterizer(this); liverpool->BindRasterizer(this);
} }

View File

@ -191,6 +191,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eBc3SrgbBlock: case vk::Format::eBc3SrgbBlock:
[[fallthrough]]; [[fallthrough]];
case vk::Format::eBc3UnormBlock: case vk::Format::eBc3UnormBlock:
case vk::Format::eBc7SrgbBlock:
return vk::Format::eR32G32B32A32Uint; return vk::Format::eR32G32B32A32Uint;
default: default:
break; break;
@ -225,7 +226,8 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
vk::BufferUsageFlagBits::eStorageBuffer; vk::BufferUsageFlagBits::eStorageBuffer;
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
: instance{instance}, scheduler{scheduler}, staging{instance, scheduler, StagingFlags, 64_MB} { : instance{instance}, scheduler{scheduler},
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
static const std::array detiler_shaders{ static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X1_COMP,