Merge pull request #217 from shadps4-emu/stabilization7
kernel/shader_recompiler: Fixes and cleanups to improve stability
This commit is contained in:
commit
f522948df8
|
@ -181,7 +181,7 @@ public:
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
size_t WriteRaw(void* data, size_t size) const {
|
||||
size_t WriteRaw(const void* data, size_t size) const {
|
||||
return std::fwrite(data, sizeof(T), size, file);
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/singleton.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
|
@ -12,26 +13,16 @@
|
|||
namespace Libraries::Kernel {
|
||||
|
||||
std::vector<Core::FileSys::DirEntry> GetDirectoryEntries(const std::string& path) {
|
||||
std::string curpath = path;
|
||||
if (!curpath.ends_with("/")) {
|
||||
curpath = std::string(curpath + "/");
|
||||
}
|
||||
std::vector<Core::FileSys::DirEntry> files;
|
||||
|
||||
for (const auto& entry : std::filesystem::directory_iterator(curpath)) {
|
||||
Core::FileSys::DirEntry e = {};
|
||||
if (std::filesystem::is_directory(entry.path().string())) {
|
||||
e.name = entry.path().filename().string();
|
||||
e.isFile = false;
|
||||
} else {
|
||||
e.name = entry.path().filename().string();
|
||||
e.isFile = true;
|
||||
}
|
||||
files.push_back(e);
|
||||
for (const auto& entry : std::filesystem::directory_iterator(path)) {
|
||||
auto& dir_entry = files.emplace_back();
|
||||
dir_entry.name = entry.path().filename().string();
|
||||
dir_entry.isFile = !std::filesystem::is_directory(entry.path().string());
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
|
||||
LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode);
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
|
@ -135,10 +126,7 @@ int PS4_SYSV_ABI posix_close(int d) {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) {
|
||||
if (buf == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) {
|
||||
if (d <= 2) { // stdin,stdout,stderr
|
||||
char* str = strdup((const char*)buf);
|
||||
if (str[nbytes - 1] == '\n')
|
||||
|
@ -152,20 +140,19 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, void* buf, size_t nbytes) {
|
|||
if (file == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
file->m_mutex.lock();
|
||||
u32 bytes_write = file->f.WriteRaw<u8>(buf, static_cast<u32>(nbytes));
|
||||
file->m_mutex.unlock();
|
||||
return bytes_write;
|
||||
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
return file->f.WriteRaw<u8>(buf, nbytes);
|
||||
}
|
||||
|
||||
size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) {
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(d);
|
||||
size_t total_read = 0;
|
||||
file->m_mutex.lock();
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
for (int i = 0; i < iovcnt; i++) {
|
||||
total_read += file->f.ReadRaw<u8>(iov[i].iov_base, iov[i].iov_len);
|
||||
}
|
||||
file->m_mutex.unlock();
|
||||
return total_read;
|
||||
}
|
||||
|
||||
|
@ -173,24 +160,18 @@ s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) {
|
|||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(d);
|
||||
|
||||
file->m_mutex.lock();
|
||||
Common::FS::SeekOrigin origin;
|
||||
Common::FS::SeekOrigin origin{};
|
||||
if (whence == 0) {
|
||||
origin = Common::FS::SeekOrigin::SetOrigin;
|
||||
}
|
||||
|
||||
if (whence == 1) {
|
||||
} else if (whence == 1) {
|
||||
origin = Common::FS::SeekOrigin::CurrentPosition;
|
||||
}
|
||||
if (whence == 2) {
|
||||
} else if (whence == 2) {
|
||||
origin = Common::FS::SeekOrigin::End;
|
||||
}
|
||||
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
file->f.Seek(offset, origin);
|
||||
auto pos = static_cast<int64_t>(file->f.Tell());
|
||||
|
||||
file->m_mutex.unlock();
|
||||
return pos;
|
||||
return file->f.Tell();
|
||||
}
|
||||
|
||||
s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) {
|
||||
|
@ -198,19 +179,14 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) {
|
|||
}
|
||||
|
||||
s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) {
|
||||
if (buf == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(d);
|
||||
if (file == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
file->m_mutex.lock();
|
||||
u32 bytes_read = file->f.ReadRaw<u8>(buf, static_cast<u32>(nbytes));
|
||||
file->m_mutex.unlock();
|
||||
return bytes_read;
|
||||
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
return file->f.ReadRaw<u8>(buf, nbytes);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_read(int d, void* buf, size_t nbytes) {
|
||||
|
@ -245,10 +221,10 @@ int PS4_SYSV_ABI posix_mkdir(const char* path, u16 mode) {
|
|||
int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) {
|
||||
LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
std::string path_name = mnt->GetHostFile(path);
|
||||
memset(sb, 0, sizeof(OrbisKernelStat));
|
||||
bool is_dir = std::filesystem::is_directory(path_name);
|
||||
bool is_file = std::filesystem::is_regular_file(path_name);
|
||||
const auto& path_name = mnt->GetHostFile(path);
|
||||
std::memset(sb, 0, sizeof(OrbisKernelStat));
|
||||
const bool is_dir = std::filesystem::is_directory(path_name);
|
||||
const bool is_file = std::filesystem::is_regular_file(path_name);
|
||||
if (!is_dir && !is_file) {
|
||||
return ORBIS_KERNEL_ERROR_ENOENT;
|
||||
}
|
||||
|
@ -290,35 +266,30 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) {
|
|||
if (d < 3) {
|
||||
return ORBIS_KERNEL_ERROR_EPERM;
|
||||
}
|
||||
|
||||
if (buf == nullptr) {
|
||||
return ORBIS_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
|
||||
if (offset < 0) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(d);
|
||||
|
||||
if (file == nullptr) {
|
||||
return ORBIS_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
file->m_mutex.lock();
|
||||
if (file->f.Tell() != offset) {
|
||||
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
const s64 pos = file->f.Tell();
|
||||
SCOPE_EXIT {
|
||||
file->f.Seek(pos);
|
||||
};
|
||||
file->f.Seek(offset);
|
||||
}
|
||||
u32 bytes_read = file->f.ReadRaw<u8>(buf, static_cast<u32>(nbytes));
|
||||
file->m_mutex.unlock();
|
||||
return bytes_read;
|
||||
return file->f.ReadRaw<u8>(buf, nbytes);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) {
|
||||
LOG_INFO(Kernel_Fs, "(PARTIAL) fd = {}", fd);
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(fd);
|
||||
memset(sb, 0, sizeof(OrbisKernelStat));
|
||||
std::memset(sb, 0, sizeof(OrbisKernelStat));
|
||||
|
||||
if (file->is_directory) {
|
||||
sb->st_mode = 0000777u | 0040000u;
|
||||
|
@ -347,13 +318,14 @@ s32 PS4_SYSV_ABI sceKernelFsync(int fd) {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int GetDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||
static int GetDents(int fd, char* buf, int nbytes, s64* basep) {
|
||||
// TODO error codes
|
||||
ASSERT(buf != nullptr);
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(fd);
|
||||
|
||||
if (file->dirents_index == file->dirents.size()) {
|
||||
return 0;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
const auto& entry = file->dirents.at(file->dirents_index++);
|
||||
|
@ -388,31 +360,23 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) {
|
|||
if (d < 3) {
|
||||
return ORBIS_KERNEL_ERROR_EPERM;
|
||||
}
|
||||
|
||||
if (buf == nullptr) {
|
||||
return ORBIS_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
|
||||
if (offset < 0) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
|
||||
auto* file = h->GetFile(d);
|
||||
|
||||
if (file == nullptr) {
|
||||
return ORBIS_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
|
||||
file->m_mutex.lock();
|
||||
|
||||
auto pos = file->f.Tell();
|
||||
file->f.Seek(offset);
|
||||
u32 bytes_write = file->f.WriteRaw<u8>(buf, static_cast<u32>(nbytes));
|
||||
std::scoped_lock lk{file->m_mutex};
|
||||
const s64 pos = file->f.Tell();
|
||||
SCOPE_EXIT {
|
||||
file->f.Seek(pos);
|
||||
file->m_mutex.unlock();
|
||||
|
||||
return bytes_write;
|
||||
};
|
||||
file->f.Seek(offset);
|
||||
return file->f.WriteRaw<u8>(buf, nbytes);
|
||||
}
|
||||
|
||||
void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <pthread.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/thread_management.h"
|
||||
#include "core/libraries/libs.h"
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
|
@ -18,26 +19,30 @@ using ListBaseHook =
|
|||
|
||||
class Semaphore {
|
||||
public:
|
||||
Semaphore(s32 init_count, s32 max_count, const char* name, bool is_fifo)
|
||||
: name{name}, token_count{init_count}, max_count{max_count}, is_fifo{is_fifo} {}
|
||||
Semaphore(s32 init_count, s32 max_count, std::string_view name, bool is_fifo)
|
||||
: name{name}, token_count{init_count}, max_count{max_count}, init_count{init_count},
|
||||
is_fifo{is_fifo} {}
|
||||
~Semaphore() {
|
||||
ASSERT(wait_list.empty());
|
||||
}
|
||||
|
||||
bool Wait(bool can_block, s32 need_count, u64* timeout) {
|
||||
if (HasAvailableTokens(need_count)) {
|
||||
return true;
|
||||
int Wait(bool can_block, s32 need_count, u32* timeout) {
|
||||
std::unique_lock lk{mutex};
|
||||
if (token_count >= need_count) {
|
||||
token_count -= need_count;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
if (!can_block) {
|
||||
return false;
|
||||
return ORBIS_KERNEL_ERROR_EBUSY;
|
||||
}
|
||||
|
||||
// Create waiting thread object and add it into the list of waiters.
|
||||
WaitingThread waiter{need_count, is_fifo};
|
||||
AddWaiter(waiter);
|
||||
SCOPE_EXIT {
|
||||
PopWaiter(waiter);
|
||||
};
|
||||
|
||||
// Perform the wait.
|
||||
return waiter.Wait(timeout);
|
||||
std::exchange(lk, std::unique_lock{waiter.mutex});
|
||||
return waiter.Wait(lk, timeout);
|
||||
}
|
||||
|
||||
bool Signal(s32 signal_count) {
|
||||
|
@ -48,25 +53,47 @@ public:
|
|||
token_count += signal_count;
|
||||
|
||||
// Wake up threads in order of priority.
|
||||
for (auto& waiter : wait_list) {
|
||||
for (auto it = wait_list.begin(); it != wait_list.end();) {
|
||||
auto& waiter = *it;
|
||||
if (waiter.need_count > token_count) {
|
||||
it++;
|
||||
continue;
|
||||
}
|
||||
std::scoped_lock lk2{waiter.mutex};
|
||||
token_count -= waiter.need_count;
|
||||
waiter.cv.notify_one();
|
||||
it = wait_list.erase(it);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
int Cancel(s32 set_count, s32* num_waiters) {
|
||||
std::scoped_lock lk{mutex};
|
||||
if (num_waiters) {
|
||||
*num_waiters = wait_list.size();
|
||||
}
|
||||
for (auto& waiter : wait_list) {
|
||||
waiter.was_cancled = true;
|
||||
waiter.cv.notify_one();
|
||||
}
|
||||
wait_list.clear();
|
||||
token_count = set_count < 0 ? init_count : set_count;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
public:
|
||||
struct WaitingThread : public ListBaseHook {
|
||||
std::mutex mutex;
|
||||
std::string name;
|
||||
std::condition_variable cv;
|
||||
u32 priority;
|
||||
s32 need_count;
|
||||
bool was_deleted{};
|
||||
bool was_cancled{};
|
||||
|
||||
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
|
||||
name = scePthreadSelf()->name;
|
||||
if (is_fifo) {
|
||||
return;
|
||||
}
|
||||
|
@ -77,12 +104,24 @@ private:
|
|||
priority = param.sched_priority;
|
||||
}
|
||||
|
||||
bool Wait(u64* timeout) {
|
||||
std::unique_lock lk{mutex};
|
||||
int GetResult(bool timed_out) {
|
||||
if (timed_out) {
|
||||
return SCE_KERNEL_ERROR_ETIMEDOUT;
|
||||
}
|
||||
if (was_deleted) {
|
||||
return SCE_KERNEL_ERROR_EACCES;
|
||||
}
|
||||
if (was_cancled) {
|
||||
return SCE_KERNEL_ERROR_ECANCELED;
|
||||
}
|
||||
return SCE_OK;
|
||||
}
|
||||
|
||||
int Wait(std::unique_lock<std::mutex>& lk, u32* timeout) {
|
||||
if (!timeout) {
|
||||
// Wait indefinitely until we are woken up.
|
||||
cv.wait(lk);
|
||||
return true;
|
||||
return GetResult(false);
|
||||
}
|
||||
// Wait until timeout runs out, recording how much remaining time there was.
|
||||
const auto start = std::chrono::high_resolution_clock::now();
|
||||
|
@ -91,16 +130,11 @@ private:
|
|||
const auto time =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
*timeout -= time;
|
||||
return status != std::cv_status::timeout;
|
||||
}
|
||||
|
||||
bool operator<(const WaitingThread& other) const {
|
||||
return priority < other.priority;
|
||||
return GetResult(status == std::cv_status::timeout);
|
||||
}
|
||||
};
|
||||
|
||||
void AddWaiter(WaitingThread& waiter) {
|
||||
std::scoped_lock lk{mutex};
|
||||
// Insert at the end of the list for FIFO order.
|
||||
if (is_fifo) {
|
||||
wait_list.push_back(waiter);
|
||||
|
@ -114,20 +148,6 @@ private:
|
|||
wait_list.insert(it, waiter);
|
||||
}
|
||||
|
||||
void PopWaiter(WaitingThread& waiter) {
|
||||
std::scoped_lock lk{mutex};
|
||||
wait_list.erase(WaitingThreads::s_iterator_to(waiter));
|
||||
}
|
||||
|
||||
bool HasAvailableTokens(s32 need_count) {
|
||||
std::scoped_lock lk{mutex};
|
||||
if (token_count >= need_count) {
|
||||
token_count -= need_count;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
using WaitingThreads =
|
||||
boost::intrusive::list<WaitingThread, boost::intrusive::base_hook<ListBaseHook>,
|
||||
boost::intrusive::constant_time_size<false>>;
|
||||
|
@ -136,6 +156,7 @@ private:
|
|||
std::atomic<s32> token_count;
|
||||
std::mutex mutex;
|
||||
s32 max_count;
|
||||
s32 init_count;
|
||||
bool is_fifo;
|
||||
};
|
||||
|
||||
|
@ -151,9 +172,8 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u64* pTimeout) {
|
||||
ASSERT(sem->Wait(true, needCount, pTimeout));
|
||||
return ORBIS_OK;
|
||||
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) {
|
||||
return sem->Wait(true, needCount, pTimeout);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
|
||||
|
@ -164,9 +184,18 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
|
|||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) {
|
||||
if (!sem->Wait(false, needCount, nullptr)) {
|
||||
return ORBIS_KERNEL_ERROR_EBUSY;
|
||||
return sem->Wait(false, needCount, nullptr);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) {
|
||||
return sem->Cancel(setCount, pNumWaitThreads);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelDeleteSema(OrbisKernelSema sem) {
|
||||
if (!sem) {
|
||||
return SCE_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
delete sem;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -175,6 +204,8 @@ void SemaphoreSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("Zxa0VhQVTsk", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitSema);
|
||||
LIB_FUNCTION("4czppHBiriw", "libkernel", 1, "libkernel", 1, 1, sceKernelSignalSema);
|
||||
LIB_FUNCTION("12wOHk8ywb0", "libkernel", 1, "libkernel", 1, 1, sceKernelPollSema);
|
||||
LIB_FUNCTION("4DM06U2BNEY", "libkernel", 1, "libkernel", 1, 1, sceKernelCancelSema);
|
||||
LIB_FUNCTION("R1Jvn8bSCW8", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteSema);
|
||||
}
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -68,9 +68,11 @@ void Linker::Execute() {
|
|||
}
|
||||
|
||||
// Configure used flexible memory size.
|
||||
if (u64* flexible_size = GetProcParam()->mem_param->flexible_memory_size) {
|
||||
if (auto* mem_param = GetProcParam()->mem_param) {
|
||||
if (u64* flexible_size = mem_param->flexible_memory_size) {
|
||||
memory->SetTotalFlexibleSize(*flexible_size);
|
||||
}
|
||||
}
|
||||
|
||||
// Init primary thread.
|
||||
Common::SetCurrentThreadName("GAME_MainThread");
|
||||
|
|
|
@ -206,9 +206,15 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
|
|||
const auto& vma = it->second;
|
||||
ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped");
|
||||
|
||||
if (start != nullptr) {
|
||||
*start = reinterpret_cast<void*>(vma.base);
|
||||
}
|
||||
if (end != nullptr) {
|
||||
*end = reinterpret_cast<void*>(vma.base + vma.size);
|
||||
}
|
||||
if (prot != nullptr) {
|
||||
*prot = static_cast<u32>(vma.prot);
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include "core/memory.h"
|
||||
#include "core/module.h"
|
||||
#include "core/tls.h"
|
||||
#include "core/virtual_memory.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
|
|
|
@ -46,40 +46,6 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
|||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
||||
// TODO: What if this is used for something other than EXEC masking?
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
@ -138,7 +104,7 @@ void Translator::S_MOV_B64(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
||||
void Translator::S_OR_B64(NegateMode negate, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
|
@ -151,9 +117,12 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
|||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
IR::U1 src1{get_src(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.LogicalNot(src1);
|
||||
}
|
||||
IR::U1 result = ir.LogicalOr(src0, src1);
|
||||
if (negate) {
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetScc(result);
|
||||
|
@ -169,7 +138,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
|
||||
void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
|
@ -183,9 +152,12 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
|
|||
}
|
||||
};
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
IR::U1 src1{get_src(inst.src[1])};
|
||||
if (negate == NegateMode::Src1) {
|
||||
src1 = ir.LogicalNot(src1);
|
||||
}
|
||||
IR::U1 result = ir.LogicalAnd(src0, src1);
|
||||
if (negate) {
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetScc(result);
|
||||
|
@ -196,6 +168,9 @@ void Translator::S_AND_B64(bool negate, const GcnInst& inst) {
|
|||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -325,4 +300,20 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_ADD_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
void Translator::S_SUB_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ISub(src0, src1));
|
||||
// TODO: Carry out
|
||||
ir.SetScc(ir.Imm1(false));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -105,7 +105,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
if (force_flt) {
|
||||
value = ir.Imm32(1.f);
|
||||
} else {
|
||||
value = ir.Imm32(std::bit_cast<u32>(1.f));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
value = ir.Imm32(0.5f);
|
||||
|
@ -274,6 +278,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_LOAD_DWORDX8:
|
||||
translator.S_LOAD_DWORD(8, inst);
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX16:
|
||||
translator.S_LOAD_DWORD(16, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||
break;
|
||||
|
@ -324,6 +331,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_SAMPLE_C_LZ:
|
||||
case Opcode::IMAGE_SAMPLE_LZ:
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_STORE:
|
||||
|
@ -437,9 +445,18 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.BUFFER_LOAD_FORMAT(4, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
translator.BUFFER_STORE_FORMAT(4, false, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_F32:
|
||||
translator.V_MAX_F32(inst);
|
||||
break;
|
||||
|
@ -453,7 +470,10 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.V_RSQ_F32(inst);
|
||||
break;
|
||||
case Opcode::S_ANDN2_B64:
|
||||
translator.S_ANDN2_B64(inst);
|
||||
translator.S_AND_B64(NegateMode::Src1, inst);
|
||||
break;
|
||||
case Opcode::S_ORN2_B64:
|
||||
translator.S_OR_B64(NegateMode::Src1, inst);
|
||||
break;
|
||||
case Opcode::V_SIN_F32:
|
||||
translator.V_SIN_F32(inst);
|
||||
|
@ -592,19 +612,19 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
break;
|
||||
case Opcode::S_OR_B64:
|
||||
translator.S_OR_B64(false, inst);
|
||||
translator.S_OR_B64(NegateMode::None, inst);
|
||||
break;
|
||||
case Opcode::S_NOR_B64:
|
||||
translator.S_OR_B64(true, inst);
|
||||
translator.S_OR_B64(NegateMode::Result, inst);
|
||||
break;
|
||||
case Opcode::S_AND_B64:
|
||||
translator.S_AND_B64(false, inst);
|
||||
translator.S_AND_B64(NegateMode::None, inst);
|
||||
break;
|
||||
case Opcode::S_NOT_B64:
|
||||
translator.S_NOT_B64(inst);
|
||||
break;
|
||||
case Opcode::S_NAND_B64:
|
||||
translator.S_AND_B64(true, inst);
|
||||
translator.S_AND_B64(NegateMode::Result, inst);
|
||||
break;
|
||||
case Opcode::V_LSHRREV_B32:
|
||||
translator.V_LSHRREV_B32(inst);
|
||||
|
@ -696,6 +716,29 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_BREV_B32:
|
||||
translator.S_BREV_B32(inst);
|
||||
break;
|
||||
case Opcode::S_ADD_U32:
|
||||
translator.S_ADD_U32(inst);
|
||||
break;
|
||||
case Opcode::S_SUB_U32:
|
||||
translator.S_SUB_U32(inst);
|
||||
break;
|
||||
// TODO: Separate implementation for legacy variants.
|
||||
case Opcode::V_MUL_LEGACY_F32:
|
||||
translator.V_MUL_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAC_LEGACY_F32:
|
||||
translator.V_MAC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_LEGACY_F32:
|
||||
translator.V_MAD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
translator.V_RSQ_F32(inst);
|
||||
break;
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
translator.V_RCP_F32(inst);
|
||||
break;
|
||||
case Opcode::S_TTRACEDATA:
|
||||
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
|
||||
break;
|
||||
|
|
|
@ -26,6 +26,12 @@ enum class ConditionOp : u32 {
|
|||
TRU,
|
||||
};
|
||||
|
||||
enum class NegateMode : u32 {
|
||||
None,
|
||||
Src1,
|
||||
Result,
|
||||
};
|
||||
|
||||
class Translator {
|
||||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info);
|
||||
|
@ -38,11 +44,10 @@ public:
|
|||
void S_MOV(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ANDN2_B64(const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_OR_B64(bool negate, const GcnInst& inst);
|
||||
void S_AND_B64(bool negate, const GcnInst& inst);
|
||||
void S_OR_B64(NegateMode negate, const GcnInst& inst);
|
||||
void S_AND_B64(NegateMode negate, const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_AND_B32(const GcnInst& inst);
|
||||
void S_OR_B32(const GcnInst& inst);
|
||||
|
@ -54,6 +59,8 @@ public:
|
|||
void S_BFM_B32(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_ADD_U32(const GcnInst& inst);
|
||||
void S_SUB_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
|
|
@ -315,8 +315,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||
inst.SetArg(arg_pos, arg);
|
||||
}
|
||||
if (inst_info.explicit_lod && inst.GetOpcode() == IR::Opcode::ImageFetch) {
|
||||
inst.SetArg(3, arg);
|
||||
if (inst_info.explicit_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod);
|
||||
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageFetch ? 3 : 2;
|
||||
inst.SetArg(pos, arg);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -485,7 +485,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|||
}
|
||||
|
||||
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||
ASSERT_MSG(vqid > 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||
auto& queue = mapped_queues[vqid];
|
||||
|
||||
const auto& task = ProcessCompute(acb);
|
||||
|
|
|
@ -354,6 +354,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc2UnormBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eR16G16Snorm;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue