Merge pull request #206 from shadps4-emu/shader-again
Add more shader instructions and some memory functions
This commit is contained in:
commit
a9cbd8287c
|
@ -73,7 +73,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) {
|
||||
LOG_ERROR(Kernel_Event, "called");
|
||||
LOG_INFO(Kernel_Event, "called");
|
||||
ef->Clear(bitPattern);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -177,10 +177,10 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
auto result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
|
||||
u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
|
||||
|
||||
if (result != ORBIS_OK) {
|
||||
LOG_ERROR(Kernel_Event, "returned {}", result);
|
||||
LOG_ERROR(Kernel_Event, "returned {:#x}", result);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
@ -177,7 +177,7 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg
|
|||
|
||||
// Load PRX module and relocate any modules that import it.
|
||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||
u32 handle = linker->LoadModule(path);
|
||||
u32 handle = linker->LoadModule(path, true);
|
||||
if (handle == -1) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
@ -298,6 +298,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
|||
sceKernelAllocateMainDirectMemory);
|
||||
LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", 1, 1,
|
||||
sceKernelAvailableDirectMemorySize);
|
||||
LIB_FUNCTION("hwVSPCmp5tM", "libkernel", 1, "libkernel", 1, 1,
|
||||
sceKernelCheckedReleaseDirectMemory);
|
||||
LIB_FUNCTION("rVjRvHJ0X6c", "libkernel", 1, "libkernel", 1, 1, sceKernelVirtualQuery);
|
||||
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
|
||||
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
|
||||
|
@ -307,6 +309,8 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
|
||||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
|
||||
LIB_FUNCTION("aNz11fnnzi4", "libkernel", 1, "libkernel", 1, 1,
|
||||
sceKernelAvailableFlexibleMemorySize);
|
||||
LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory);
|
||||
LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1,
|
||||
_sceKernelRtldSetApplicationHeapAPI);
|
||||
|
|
|
@ -173,6 +173,13 @@ int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInf
|
|||
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
*out_size = memory->GetAvailableFlexibleSize();
|
||||
LOG_INFO(Kernel_Vmm, "called size = {:#x}", *out_size);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
|
||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||
linker->SetHeapApiFunc(func);
|
||||
|
|
|
@ -78,6 +78,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
|
|||
|
||||
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
||||
size_t infoSize);
|
||||
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut);
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -1276,6 +1276,10 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) {
|
|||
return pthread_once(reinterpret_cast<pthread_once_t*>(once_control), init_routine);
|
||||
}
|
||||
|
||||
[[noreturn]] void PS4_SYSV_ABI scePthreadExit(void* value_ptr) {
|
||||
pthread_exit(value_ptr);
|
||||
}
|
||||
|
||||
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate);
|
||||
LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init);
|
||||
|
@ -1293,6 +1297,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, scePthreadJoin);
|
||||
LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach);
|
||||
LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual);
|
||||
LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit);
|
||||
LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal);
|
||||
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
|
||||
|
||||
|
|
|
@ -56,10 +56,20 @@ int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) {
|
|||
} else {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
|
||||
}
|
||||
#else
|
||||
usleep(microseconds);
|
||||
#endif
|
||||
return 0;
|
||||
#else
|
||||
timespec start;
|
||||
timespec remain;
|
||||
start.tv_sec = microseconds / 1000000;
|
||||
start.tv_nsec = (microseconds % 1000000) * 1000;
|
||||
timespec* requested = &start;
|
||||
int ret = 0;
|
||||
do {
|
||||
ret = nanosleep(requested, &remain);
|
||||
requested = &remain;
|
||||
} while (ret != 0);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_usleep(u32 microseconds) {
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "core/libraries/kernel/memory_management.h"
|
||||
#include "core/libraries/kernel/thread_management.h"
|
||||
#include "core/linker.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/tls.h"
|
||||
#include "core/virtual_memory.h"
|
||||
|
||||
|
@ -46,7 +47,7 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
|
|||
: "rax", "rsi", "rdi");
|
||||
}
|
||||
|
||||
Linker::Linker() = default;
|
||||
Linker::Linker() : memory{Memory::Instance()} {}
|
||||
|
||||
Linker::~Linker() = default;
|
||||
|
||||
|
@ -66,6 +67,11 @@ void Linker::Execute() {
|
|||
Relocate(m.get());
|
||||
}
|
||||
|
||||
// Configure used flexible memory size.
|
||||
if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) {
|
||||
memory->SetTotalFlexibleSize(*flexible_size);
|
||||
}
|
||||
|
||||
// Init primary thread.
|
||||
Common::SetCurrentThreadName("GAME_MainThread");
|
||||
Libraries::Kernel::pthreadInitSelfMainThread();
|
||||
|
@ -90,7 +96,7 @@ void Linker::Execute() {
|
|||
}
|
||||
}
|
||||
|
||||
s32 Linker::LoadModule(const std::filesystem::path& elf_name) {
|
||||
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
|
||||
std::scoped_lock lk{mutex};
|
||||
|
||||
if (!std::filesystem::exists(elf_name)) {
|
||||
|
@ -98,12 +104,13 @@ s32 Linker::LoadModule(const std::filesystem::path& elf_name) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
auto module = std::make_unique<Module>(elf_name, max_tls_index);
|
||||
auto module = std::make_unique<Module>(memory, elf_name, max_tls_index);
|
||||
if (!module->IsValid()) {
|
||||
LOG_ERROR(Core_Linker, "Provided file {} is not valid ELF file", elf_name.string());
|
||||
return -1;
|
||||
}
|
||||
|
||||
num_static_modules += !is_dynamic;
|
||||
m_modules.emplace_back(std::move(module));
|
||||
return m_modules.size() - 1;
|
||||
}
|
||||
|
@ -143,11 +150,13 @@ void Linker::Relocate(Module* module) {
|
|||
case R_X86_64_RELATIVE:
|
||||
rel_value = rel_base_virtual_addr + addend;
|
||||
rel_is_resolved = true;
|
||||
module->SetRelaBit(bit_idx);
|
||||
break;
|
||||
case R_X86_64_DTPMOD64:
|
||||
rel_value = static_cast<u64>(module->tls.modid);
|
||||
rel_is_resolved = true;
|
||||
rel_sym_type = Loader::SymbolType::Tls;
|
||||
module->SetRelaBit(bit_idx);
|
||||
break;
|
||||
case R_X86_64_GLOB_DAT:
|
||||
case R_X86_64_JUMP_SLOT:
|
||||
|
@ -343,7 +352,8 @@ void Linker::InitTlsForThread(bool is_primary) {
|
|||
dtv_table[1].counter = num_dtvs;
|
||||
|
||||
// Copy init images to TLS thread blocks and map them to DTV slots.
|
||||
for (const auto& module : m_modules) {
|
||||
for (u32 i = 0; i < num_static_modules; i++) {
|
||||
auto* module = m_modules[i].get();
|
||||
if (module->tls.image_size == 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,33 @@ namespace Core {
|
|||
|
||||
struct DynamicModuleInfo;
|
||||
class Linker;
|
||||
class MemoryManager;
|
||||
|
||||
struct OrbisKernelMemParam {
|
||||
u64 size;
|
||||
u64* extended_page_table;
|
||||
u64* flexible_memory_size;
|
||||
u8* extended_memory_1;
|
||||
u64* extended_gpu_page_table;
|
||||
u8* extended_memory_2;
|
||||
u64* exnteded_cpu_page_table;
|
||||
};
|
||||
|
||||
struct OrbisProcParam {
|
||||
u64 size;
|
||||
u32 magic;
|
||||
u32 entry_count;
|
||||
u64 sdk_version;
|
||||
char* process_name;
|
||||
char* main_thread_name;
|
||||
u32* main_thread_prio;
|
||||
u32* main_thread_stack_size;
|
||||
void* libc_param;
|
||||
OrbisKernelMemParam* mem_param;
|
||||
void* fs_param;
|
||||
u32* process_preload_enable;
|
||||
u64 unknown1;
|
||||
};
|
||||
|
||||
struct EntryParams {
|
||||
int argc;
|
||||
|
@ -30,8 +57,8 @@ public:
|
|||
return m_hle_symbols;
|
||||
}
|
||||
|
||||
VAddr GetProcParam() const {
|
||||
return m_modules[0]->GetProcParam();
|
||||
OrbisProcParam* GetProcParam() const {
|
||||
return m_modules[0]->GetProcParam<OrbisProcParam*>();
|
||||
}
|
||||
|
||||
Module* GetModule(s32 index) const {
|
||||
|
@ -59,7 +86,7 @@ public:
|
|||
void* TlsGetAddr(u64 module_index, u64 offset);
|
||||
void InitTlsForThread(bool is_primary = false);
|
||||
|
||||
s32 LoadModule(const std::filesystem::path& elf_name);
|
||||
s32 LoadModule(const std::filesystem::path& elf_name, bool is_dynamic = false);
|
||||
Module* FindByAddress(VAddr address);
|
||||
|
||||
void Relocate(Module* module);
|
||||
|
@ -71,10 +98,12 @@ public:
|
|||
private:
|
||||
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
|
||||
|
||||
MemoryManager* memory;
|
||||
std::mutex mutex;
|
||||
u32 dtv_generation_counter{1};
|
||||
size_t static_tls_size{};
|
||||
u32 max_tls_index{};
|
||||
u32 num_static_modules{};
|
||||
HeapApiFunc heap_api_func{};
|
||||
std::vector<std::unique_ptr<Module>> m_modules;
|
||||
Loader::SymbolsResolver m_hle_symbols{};
|
||||
|
|
|
@ -84,7 +84,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
|||
MemoryMapFlags flags, VMAType type, std::string_view name,
|
||||
bool is_exec, PAddr phys_addr, u64 alignment) {
|
||||
std::scoped_lock lk{mutex};
|
||||
if (type == VMAType::Flexible && total_flexible_usage + size > 448_MB) {
|
||||
if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
|
||||
return SCE_KERNEL_ERROR_ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
|||
MapVulkanMemory(mapped_addr, size);
|
||||
}
|
||||
if (type == VMAType::Flexible) {
|
||||
total_flexible_usage += size;
|
||||
flexible_usage += size;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -184,7 +184,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
UnmapVulkanMemory(virtual_addr, size);
|
||||
}
|
||||
if (type == VMAType::Flexible) {
|
||||
total_flexible_usage -= size;
|
||||
flexible_usage -= size;
|
||||
}
|
||||
|
||||
// Mark region as free and attempt to coalesce it with neighbours.
|
||||
|
|
|
@ -124,6 +124,14 @@ public:
|
|||
instance = instance_;
|
||||
}
|
||||
|
||||
void SetTotalFlexibleSize(u64 size) {
|
||||
total_flexible_size = size;
|
||||
}
|
||||
|
||||
u64 GetAvailableFlexibleSize() const {
|
||||
return total_flexible_size - flexible_usage;
|
||||
}
|
||||
|
||||
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
|
||||
int memory_type);
|
||||
|
||||
|
@ -195,7 +203,8 @@ private:
|
|||
DMemMap dmem_map;
|
||||
VMAMap vma_map;
|
||||
std::recursive_mutex mutex;
|
||||
size_t total_flexible_usage{};
|
||||
size_t total_flexible_size = 448_MB;
|
||||
size_t flexible_usage{};
|
||||
|
||||
struct MappedMemory {
|
||||
vk::UniqueBuffer buffer;
|
||||
|
|
|
@ -55,8 +55,8 @@ static std::string EncodeId(u64 nVal) {
|
|||
return enc;
|
||||
}
|
||||
|
||||
Module::Module(const std::filesystem::path& file_, u32& max_tls_index)
|
||||
: file{file_}, name{file.stem().string()} {
|
||||
Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index)
|
||||
: memory{memory_}, file{file_}, name{file.stem().string()} {
|
||||
elf.Open(file);
|
||||
if (elf.IsElfFile()) {
|
||||
LoadModuleToMemory(max_tls_index);
|
||||
|
@ -84,7 +84,6 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
|
|||
aligned_base_size = Common::AlignUp(base_size, BlockAlign);
|
||||
|
||||
// Map module segments (and possible TLS trampolines)
|
||||
auto* memory = Core::Memory::Instance();
|
||||
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
|
||||
memory->MapMemory(out_addr, LoadAddress, aligned_base_size + TrampolineSize,
|
||||
MemoryProt::CpuReadWrite, MemoryMapFlags::Fixed, VMAType::Code, name, true);
|
||||
|
|
|
@ -137,10 +137,12 @@ struct DynamicModuleInfo {
|
|||
};
|
||||
|
||||
using ModuleFunc = int (*)(size_t, const void*);
|
||||
class MemoryManager;
|
||||
|
||||
class Module {
|
||||
public:
|
||||
explicit Module(const std::filesystem::path& file, u32& max_tls_index);
|
||||
explicit Module(Core::MemoryManager* memory, const std::filesystem::path& file,
|
||||
u32& max_tls_index);
|
||||
~Module();
|
||||
|
||||
VAddr GetBaseAddress() const noexcept {
|
||||
|
@ -220,6 +222,7 @@ public:
|
|||
const LibraryInfo* FindLibrary(std::string_view id);
|
||||
|
||||
public:
|
||||
Core::MemoryManager* memory;
|
||||
std::filesystem::path file;
|
||||
std::string name;
|
||||
Loader::Elf elf;
|
||||
|
|
|
@ -54,7 +54,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
|
|||
Id ms) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
|
||||
if (Sirit::ValidId(lod)) {
|
||||
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
|
||||
} else {
|
||||
return ctx.OpImageFetch(ctx.F32[4], image, coords);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
|
||||
|
|
|
@ -216,6 +216,14 @@ void Translator::S_AND_B32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_OR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.BitwiseOr(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
@ -285,4 +293,36 @@ void Translator::S_BFM_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
|
||||
}
|
||||
|
||||
void Translator::S_NOT_B64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 result = ir.LogicalNot(src0);
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BREV_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -324,7 +324,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.IMAGE_STORE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
translator.IMAGE_LOAD_MIP(inst);
|
||||
translator.IMAGE_LOAD(true, inst);
|
||||
break;
|
||||
case Opcode::IMAGE_LOAD:
|
||||
translator.IMAGE_LOAD(false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
|
@ -335,6 +338,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_LE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||
break;
|
||||
|
@ -386,6 +392,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CMP_NLT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LT_U32:
|
||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
|
@ -585,6 +594,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_AND_B64:
|
||||
translator.S_AND_B64(false, inst);
|
||||
break;
|
||||
case Opcode::S_NOT_B64:
|
||||
translator.S_NOT_B64(inst);
|
||||
break;
|
||||
case Opcode::S_NAND_B64:
|
||||
translator.S_AND_B64(true, inst);
|
||||
break;
|
||||
|
@ -627,6 +639,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_AND_B32:
|
||||
translator.S_AND_B32(inst);
|
||||
break;
|
||||
case Opcode::S_OR_B32:
|
||||
translator.S_OR_B32(inst);
|
||||
break;
|
||||
case Opcode::S_LSHR_B32:
|
||||
translator.S_LSHR_B32(inst);
|
||||
break;
|
||||
|
@ -657,9 +672,27 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_BFM_B32:
|
||||
translator.S_BFM_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_U32:
|
||||
translator.V_MIN_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NE_U64:
|
||||
translator.V_CMP_NE_U64(inst);
|
||||
break;
|
||||
case Opcode::V_TRUNC_F32:
|
||||
translator.V_TRUNC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CEIL_F32:
|
||||
translator.V_CEIL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_BFI_B32:
|
||||
translator.V_BFI_B32(inst);
|
||||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
translator.S_BREV_B32(inst);
|
||||
break;
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
|
|
@ -45,12 +45,15 @@ public:
|
|||
void S_AND_B64(bool negate, const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_AND_B32(const GcnInst& inst);
|
||||
void S_OR_B32(const GcnInst& inst);
|
||||
void S_LSHR_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B64(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_LSHL_B32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -115,6 +118,10 @@ public:
|
|||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
void V_TRUNC_F32(const GcnInst& inst);
|
||||
void V_CEIL_F32(const GcnInst& inst);
|
||||
void V_MIN_U32(const GcnInst& inst);
|
||||
void V_CMP_NE_U64(const GcnInst& inst);
|
||||
void V_BFI_B32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
@ -132,7 +139,7 @@ public:
|
|||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_LOAD_MIP(const GcnInst& inst);
|
||||
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
|
||||
|
||||
// Export
|
||||
void EXP(const GcnInst& inst);
|
||||
|
|
|
@ -430,4 +430,52 @@ void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
||||
}
|
||||
|
||||
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPCeil(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MIN_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IMin(src0, src1, false));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_NE_U64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(src0);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_BFI_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0],
|
||||
ir.BitwiseOr(ir.BitwiseAnd(src0, src1), ir.BitwiseAnd(ir.BitwiseNot(src0), src2)));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -31,7 +31,9 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
|||
|
||||
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
ASSERT(!mimg.da);
|
||||
if (mimg.da) {
|
||||
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
|
||||
}
|
||||
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
|
@ -107,7 +109,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
|
||||
void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
|
@ -119,7 +121,7 @@ void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
|
|||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.explicit_lod.Assign(1);
|
||||
info.explicit_lod.Assign(has_mip);
|
||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
|
|
|
@ -251,7 +251,9 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
|||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
ASSERT(producer->GetOpcode() ==
|
||||
IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
producer->GetOpcode() == IR::Opcode::GetUserData);
|
||||
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
|
||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
||||
|
|
|
@ -827,7 +827,8 @@ struct Liverpool {
|
|||
PolygonControl polygon_control;
|
||||
ViewportControl viewport_control;
|
||||
VsOutputControl vs_output_control;
|
||||
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 1);
|
||||
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 2);
|
||||
u32 index_size;
|
||||
u32 max_index_size;
|
||||
IndexBufferType index_buffer_type;
|
||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||
|
@ -993,6 +994,7 @@ static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
|
|||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 128_MB} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
|
|
@ -191,6 +191,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eBc3SrgbBlock:
|
||||
[[fallthrough]];
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
default:
|
||||
break;
|
||||
|
@ -225,7 +226,8 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
|
|||
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler}, staging{instance, scheduler, StagingFlags, 64_MB} {
|
||||
: instance{instance}, scheduler{scheduler},
|
||||
staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} {
|
||||
|
||||
static const std::array detiler_shaders{
|
||||
HostShaders::DETILE_M8X1_COMP,
|
||||
|
|
Loading…
Reference in New Issue