Merge branch 'shadps4-emu:main' into main
This commit is contained in:
commit
0f37c903ef
|
@ -103,6 +103,8 @@ if(ENABLE_QT_GUI)
|
|||
find_package(Qt6 REQUIRED COMPONENTS Widgets Concurrent)
|
||||
qt_standard_project_setup()
|
||||
set(CMAKE_AUTORCC ON)
|
||||
set(CMAKE_AUTOMOC ON)
|
||||
set(CMAKE_AUTOUIC ON)
|
||||
endif()
|
||||
|
||||
set(AUDIO_CORE src/audio_core/sdl_audio.cpp
|
||||
|
@ -419,6 +421,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||
|
@ -546,10 +549,13 @@ set(QT_GUI
|
|||
src/qt_gui/elf_viewer.h
|
||||
src/qt_gui/main_window_themes.cpp
|
||||
src/qt_gui/main_window_themes.h
|
||||
src/qt_gui/settings_dialog.cpp
|
||||
src/qt_gui/settings_dialog.h
|
||||
src/qt_gui/settings_dialog.ui
|
||||
src/qt_gui/main.cpp
|
||||
${EMULATOR}
|
||||
${RESOURCE_FILES}
|
||||
)
|
||||
)
|
||||
endif()
|
||||
|
||||
if (ENABLE_QT_GUI)
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7
|
||||
Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53
|
|
@ -25,7 +25,9 @@ static bool shouldDumpPM4 = false;
|
|||
static u32 vblankDivider = 1;
|
||||
static bool vkValidation = false;
|
||||
static bool vkValidationSync = false;
|
||||
static bool vkValidationGpu = false;
|
||||
static bool rdocEnable = false;
|
||||
static bool rdocMarkersEnable = false;
|
||||
// Gui
|
||||
std::string settings_install_dir = "";
|
||||
u32 main_window_geometry_x = 400;
|
||||
|
@ -102,6 +104,10 @@ bool isRdocEnabled() {
|
|||
return rdocEnable;
|
||||
}
|
||||
|
||||
bool isMarkersEnabled() {
|
||||
return rdocMarkersEnable;
|
||||
}
|
||||
|
||||
u32 vblankDiv() {
|
||||
return vblankDivider;
|
||||
}
|
||||
|
@ -114,6 +120,78 @@ bool vkValidationSyncEnabled() {
|
|||
return vkValidationSync;
|
||||
}
|
||||
|
||||
bool vkValidationGpuEnabled() {
|
||||
return vkValidationGpu;
|
||||
}
|
||||
|
||||
void setGpuId(s32 selectedGpuId) {
|
||||
gpuId = selectedGpuId;
|
||||
}
|
||||
|
||||
void setScreenWidth(u32 width) {
|
||||
screenWidth = width;
|
||||
}
|
||||
|
||||
void setScreenHeight(u32 height) {
|
||||
screenHeight = height;
|
||||
}
|
||||
|
||||
void setDebugDump(bool enable) {
|
||||
isDebugDump = enable;
|
||||
}
|
||||
|
||||
void setShowSplash(bool enable) {
|
||||
isShowSplash = enable;
|
||||
}
|
||||
|
||||
void setNullGpu(bool enable) {
|
||||
isNullGpu = enable;
|
||||
}
|
||||
|
||||
void setDumpShaders(bool enable) {
|
||||
shouldDumpShaders = enable;
|
||||
}
|
||||
|
||||
void setDumpPM4(bool enable) {
|
||||
shouldDumpPM4 = enable;
|
||||
}
|
||||
|
||||
void setVkValidation(bool enable) {
|
||||
vkValidation = enable;
|
||||
}
|
||||
|
||||
void setVkSyncValidation(bool enable) {
|
||||
vkValidationSync = enable;
|
||||
}
|
||||
|
||||
void setRdocEnabled(bool enable) {
|
||||
rdocEnable = enable;
|
||||
}
|
||||
|
||||
void setVblankDiv(u32 value) {
|
||||
vblankDivider = value;
|
||||
}
|
||||
|
||||
void setFullscreenMode(bool enable) {
|
||||
isFullscreen = enable;
|
||||
}
|
||||
|
||||
void setLanguage(u32 language) {
|
||||
m_language = language;
|
||||
}
|
||||
|
||||
void setNeoMode(bool enable) {
|
||||
isNeo = enable;
|
||||
}
|
||||
|
||||
void setLogType(std::string type) {
|
||||
logType = type;
|
||||
}
|
||||
|
||||
void setLogFilter(std::string type) {
|
||||
logFilter = type;
|
||||
}
|
||||
|
||||
void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) {
|
||||
main_window_geometry_x = x;
|
||||
main_window_geometry_y = y;
|
||||
|
@ -255,7 +333,9 @@ void load(const std::filesystem::path& path) {
|
|||
gpuId = toml::find_or<int>(vk, "gpuId", -1);
|
||||
vkValidation = toml::find_or<bool>(vk, "validation", false);
|
||||
vkValidationSync = toml::find_or<bool>(vk, "validation_sync", false);
|
||||
vkValidationGpu = toml::find_or<bool>(vk, "validation_gpu", true);
|
||||
rdocEnable = toml::find_or<bool>(vk, "rdocEnable", false);
|
||||
rdocMarkersEnable = toml::find_or<bool>(vk, "rdocMarkersEnable", false);
|
||||
}
|
||||
|
||||
if (data.contains("Debug")) {
|
||||
|
@ -330,7 +410,9 @@ void save(const std::filesystem::path& path) {
|
|||
data["Vulkan"]["gpuId"] = gpuId;
|
||||
data["Vulkan"]["validation"] = vkValidation;
|
||||
data["Vulkan"]["validation_sync"] = vkValidationSync;
|
||||
data["Vulkan"]["validation_gpu"] = vkValidationGpu;
|
||||
data["Vulkan"]["rdocEnable"] = rdocEnable;
|
||||
data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable;
|
||||
data["Debug"]["DebugDump"] = isDebugDump;
|
||||
data["LLE"]["libc"] = isLibc;
|
||||
data["GUI"]["theme"] = mw_themes;
|
||||
|
@ -356,4 +438,24 @@ void save(const std::filesystem::path& path) {
|
|||
file << data;
|
||||
file.close();
|
||||
}
|
||||
|
||||
void setDefaultValues() {
|
||||
isNeo = false;
|
||||
isFullscreen = false;
|
||||
screenWidth = 1280;
|
||||
screenHeight = 720;
|
||||
logFilter = "";
|
||||
logType = "async";
|
||||
isDebugDump = false;
|
||||
isShowSplash = false;
|
||||
isNullGpu = false;
|
||||
shouldDumpShaders = false;
|
||||
shouldDumpPM4 = false;
|
||||
vblankDivider = 1;
|
||||
vkValidation = false;
|
||||
rdocEnable = false;
|
||||
m_language = 1;
|
||||
gpuId = -1;
|
||||
}
|
||||
|
||||
} // namespace Config
|
||||
|
|
|
@ -27,10 +27,32 @@ bool nullGpu();
|
|||
bool dumpShaders();
|
||||
bool dumpPM4();
|
||||
bool isRdocEnabled();
|
||||
bool isMarkersEnabled();
|
||||
u32 vblankDiv();
|
||||
|
||||
void setDebugDump(bool enable);
|
||||
void setShowSplash(bool enable);
|
||||
void setNullGpu(bool enable);
|
||||
void setDumpShaders(bool enable);
|
||||
void setDumpPM4(bool enable);
|
||||
void setVblankDiv(u32 value);
|
||||
void setGpuId(s32 selectedGpuId);
|
||||
void setScreenWidth(u32 width);
|
||||
void setScreenHeight(u32 height);
|
||||
void setFullscreenMode(bool enable);
|
||||
void setLanguage(u32 language);
|
||||
void setNeoMode(bool enable);
|
||||
|
||||
void setLogType(std::string type);
|
||||
void setLogFilter(std::string type);
|
||||
|
||||
void setVkValidation(bool enable);
|
||||
void setVkSyncValidation(bool enable);
|
||||
void setRdocEnabled(bool enable);
|
||||
|
||||
bool vkValidationEnabled();
|
||||
bool vkValidationSyncEnabled();
|
||||
bool vkValidationGpuEnabled();
|
||||
|
||||
// Gui
|
||||
void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h);
|
||||
|
@ -64,7 +86,8 @@ std::vector<std::string> getPkgViewer();
|
|||
std::vector<std::string> getElfViewer();
|
||||
std::vector<std::string> getRecentFiles();
|
||||
|
||||
void setDefaultValues();
|
||||
|
||||
// settings
|
||||
u32 GetLanguage();
|
||||
|
||||
}; // namespace Config
|
||||
|
|
|
@ -459,8 +459,28 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32
|
|||
#endif
|
||||
}
|
||||
|
||||
void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) {
|
||||
return impl->Unmap(virtual_addr, size, has_backing);
|
||||
void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
|
||||
PAddr phys_base, bool is_exec, bool has_backing) {
|
||||
#ifdef _WIN32
|
||||
// There does not appear to be comparable support for partial unmapping on Windows.
|
||||
// Unfortunately, a least one title was found to require this. The workaround is to unmap
|
||||
// the entire allocation and remap the portions outside of the requested unmapping range.
|
||||
impl->Unmap(virtual_addr, size, has_backing);
|
||||
|
||||
// TODO: Determine if any titles require partial unmapping support for flexible allocations.
|
||||
ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size),
|
||||
"Partial unmapping of flexible allocations is not supported");
|
||||
|
||||
if (start_in_vma != 0) {
|
||||
Map(virtual_addr, start_in_vma, 0, phys_base, is_exec);
|
||||
}
|
||||
|
||||
if (end_in_vma != size) {
|
||||
Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec);
|
||||
}
|
||||
#else
|
||||
impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing);
|
||||
#endif
|
||||
}
|
||||
|
||||
void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) {
|
||||
|
|
|
@ -91,7 +91,8 @@ public:
|
|||
void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd);
|
||||
|
||||
/// Unmaps specified virtual memory area.
|
||||
void Unmap(VAddr virtual_addr, size_t size, bool has_backing);
|
||||
void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
|
||||
PAddr phys_base, bool is_exec, bool has_backing);
|
||||
|
||||
void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms);
|
||||
|
||||
|
|
|
@ -54,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory)
|
|||
|
||||
// If the path does not exist attempt to verify this.
|
||||
// Retrieve parent path until we find one that exists.
|
||||
std::scoped_lock lk{m_mutex};
|
||||
path_parts.clear();
|
||||
auto current_path = host_path;
|
||||
while (!std::filesystem::exists(current_path)) {
|
||||
|
|
|
@ -235,6 +235,9 @@ int PS4_SYSV_ABI sceAudioOutGetSystemState() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceAudioOutInit() {
|
||||
if (audio != nullptr) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT;
|
||||
}
|
||||
audio = std::make_unique<Audio::SDLAudio>();
|
||||
LOG_INFO(Lib_AudioOut, "called");
|
||||
return ORBIS_OK;
|
||||
|
|
|
@ -956,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||
LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return Config::isNeoMode() ? 911'000'000 : 800'000'000;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
|
||||
|
@ -1706,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSetVgtControl() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one,
|
||||
u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) ||
|
||||
((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const u32 reg_value =
|
||||
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp();
|
|||
int PS4_SYSV_ABI sceGnmGetEqEventType();
|
||||
int PS4_SYSV_ABI sceGnmGetEqTimeStamp();
|
||||
int PS4_SYSV_ABI sceGnmGetGpuBlockStatus();
|
||||
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
|
||||
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
|
||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus();
|
||||
int PS4_SYSV_ABI sceGnmGetLastWaitedAddress();
|
||||
int PS4_SYSV_ABI sceGnmGetNumTcaUnits();
|
||||
|
@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData();
|
|||
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters();
|
||||
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance();
|
||||
int PS4_SYSV_ABI sceGnmSetupMipStatsReport();
|
||||
int PS4_SYSV_ABI sceGnmSetVgtControl();
|
||||
s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one,
|
||||
u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode);
|
||||
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier);
|
||||
int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier();
|
||||
int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers();
|
||||
|
|
|
@ -360,7 +360,6 @@ int PS4_SYSV_ABI posix_connect() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI _sigprocmask() {
|
||||
LOG_DEBUG(Lib_Kernel, "STUBBED");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -75,13 +75,22 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
|
|||
size_t* sizeOut) {
|
||||
LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}",
|
||||
searchStart, searchEnd, alignment);
|
||||
|
||||
if (searchEnd <= searchStart) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
if (searchEnd > SCE_KERNEL_MAIN_DMEM_SIZE) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
auto* memory = Core::Memory::Instance();
|
||||
|
||||
PAddr physAddr;
|
||||
s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut);
|
||||
s32 result =
|
||||
memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut);
|
||||
*physAddrOut = static_cast<u64>(physAddr);
|
||||
|
||||
return size;
|
||||
return result;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
|
||||
|
@ -244,9 +253,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]) {
|
||||
auto* linker = Common::Singleton<Core::Linker>::Instance();
|
||||
linker->SetHeapApiFunc(func);
|
||||
linker->SetHeapAPI(func);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,
|
||||
|
|
|
@ -102,7 +102,7 @@ int PS4_SYSV_ABI sceKernelMTypeProtect(void* addr, size_t size, int mtype, int p
|
|||
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
||||
size_t infoSize);
|
||||
s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut);
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
|
||||
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]);
|
||||
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,
|
||||
void** directMemoryStartOut,
|
||||
void** directMemoryEndOut);
|
||||
|
|
|
@ -421,13 +421,21 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) {
|
|||
return addr;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* attr,
|
||||
int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* mutex_attr,
|
||||
const char* name) {
|
||||
const ScePthreadMutexattr* attr;
|
||||
|
||||
if (mutex == nullptr) {
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
if (attr == nullptr) {
|
||||
if (mutex_attr == nullptr) {
|
||||
attr = g_pthread_cxt->getDefaultMutexattr();
|
||||
} else {
|
||||
if (*mutex_attr == nullptr) {
|
||||
attr = g_pthread_cxt->getDefaultMutexattr();
|
||||
} else {
|
||||
attr = mutex_attr;
|
||||
}
|
||||
}
|
||||
|
||||
*mutex = new PthreadMutexInternal{};
|
||||
|
@ -1086,6 +1094,19 @@ int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_
|
|||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI scePthreadAttrSetstack(ScePthreadAttr* attr, void* addr, size_t size) {
|
||||
if (attr == nullptr || *attr == nullptr || addr == nullptr || size < 0x4000) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, size);
|
||||
LOG_INFO(Kernel_Pthread, "scePthreadAttrSetstack: result = {}", result);
|
||||
|
||||
if (result == 0) {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI scePthreadJoin(ScePthread thread, void** res) {
|
||||
int result = pthread_join(thread->pth, res);
|
||||
LOG_INFO(Kernel_Pthread, "scePthreadJoin result = {}", result);
|
||||
|
@ -1542,6 +1563,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("B5GmVDKwpn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_yield);
|
||||
|
||||
LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack);
|
||||
LIB_FUNCTION("Bvn74vj6oLo", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstack);
|
||||
LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr);
|
||||
LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize);
|
||||
LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce);
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/thread_management.h"
|
||||
#include "core/libraries/libs.h"
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
|
@ -82,7 +81,6 @@ public:
|
|||
|
||||
public:
|
||||
struct WaitingThread : public ListBaseHook {
|
||||
std::string name;
|
||||
std::condition_variable cv;
|
||||
u32 priority;
|
||||
s32 need_count;
|
||||
|
@ -90,7 +88,6 @@ public:
|
|||
bool was_cancled{};
|
||||
|
||||
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
|
||||
name = scePthreadSelf()->name;
|
||||
if (is_fifo) {
|
||||
return;
|
||||
}
|
||||
|
@ -174,10 +171,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3
|
|||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) {
|
||||
if (!sem) {
|
||||
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
return sem->Wait(true, needCount, pTimeout);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
|
||||
if (!sem) {
|
||||
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
if (!sem->Signal(signalCount)) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
@ -185,10 +188,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) {
|
|||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) {
|
||||
if (!sem) {
|
||||
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
return sem->Wait(false, needCount, nullptr);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) {
|
||||
if (!sem) {
|
||||
return ORBIS_KERNEL_ERROR_ESRCH;
|
||||
}
|
||||
return sem->Cancel(setCount, pNumWaitThreads);
|
||||
}
|
||||
|
||||
|
|
|
@ -974,8 +974,11 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNpGetNpId() {
|
||||
LOG_ERROR(Lib_NpManager, "(STUBBED) called");
|
||||
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId) {
|
||||
LOG_ERROR(Lib_NpManager, "(DUMMY) called");
|
||||
|
||||
std::string name = "shadps4";
|
||||
strcpy(npId->handle.data, name.c_str());
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,22 @@ class SymbolsResolver;
|
|||
|
||||
namespace Libraries::NpManager {
|
||||
|
||||
constexpr int ORBIS_NP_ONLINEID_MAX_LENGTH = 16;
|
||||
|
||||
typedef int OrbisUserServiceUserId;
|
||||
|
||||
struct OrbisNpOnlineId {
|
||||
char data[ORBIS_NP_ONLINEID_MAX_LENGTH];
|
||||
char term;
|
||||
char dummy[3];
|
||||
};
|
||||
|
||||
struct OrbisNpId {
|
||||
OrbisNpOnlineId handle;
|
||||
u8 opt[8];
|
||||
u8 reserved[8];
|
||||
};
|
||||
|
||||
int PS4_SYSV_ABI Func_EF4378573542A508();
|
||||
int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromKernel();
|
||||
int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromPool();
|
||||
|
@ -204,7 +220,7 @@ int PS4_SYSV_ABI sceNpGetAccountLanguage2();
|
|||
int PS4_SYSV_ABI sceNpGetAccountLanguageA();
|
||||
int PS4_SYSV_ABI sceNpGetGamePresenceStatus();
|
||||
int PS4_SYSV_ABI sceNpGetGamePresenceStatusA();
|
||||
int PS4_SYSV_ABI sceNpGetNpId();
|
||||
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId);
|
||||
int PS4_SYSV_ABI sceNpGetNpReachabilityState();
|
||||
int PS4_SYSV_ABI sceNpGetOnlineId();
|
||||
int PS4_SYSV_ABI sceNpGetParentalControlInfo();
|
||||
|
|
|
@ -419,8 +419,14 @@ int PS4_SYSV_ABI scePadSetForceIntercepted() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI scePadSetLightBar(s32 handle, const OrbisPadLightBarParam* pParam) {
|
||||
LOG_ERROR(Lib_Pad, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
if (pParam != nullptr) {
|
||||
LOG_INFO(Lib_Pad, "scePadSetLightBar called handle = {} rgb = {} {} {}", handle, pParam->r,
|
||||
pParam->g, pParam->b);
|
||||
auto* controller = Common::Singleton<Input::GameController>::Instance();
|
||||
controller->SetLightBarRGB(pParam->r, pParam->g, pParam->b);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return ORBIS_PAD_ERROR_INVALID_ARG;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI scePadSetLightBarBaseBrightness() {
|
||||
|
@ -479,8 +485,14 @@ int PS4_SYSV_ABI scePadSetUserColor() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) {
|
||||
LOG_DEBUG(Lib_Pad, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
if (pParam != nullptr) {
|
||||
LOG_INFO(Lib_Pad, "scePadSetVibration called handle = {} data = {} , {}", handle,
|
||||
pParam->smallMotor, pParam->largeMotor);
|
||||
auto* controller = Common::Singleton<Input::GameController>::Instance();
|
||||
controller->SetVibration(pParam->smallMotor, pParam->largeMotor);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return ORBIS_PAD_ERROR_INVALID_ARG;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI scePadSetVibrationForce() {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/time_management.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/platform.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
||||
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||
|
@ -173,14 +174,19 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
|
|||
|
||||
// Update flip status.
|
||||
auto* port = req.port;
|
||||
auto& flip_status = port->flip_status;
|
||||
flip_status.count++;
|
||||
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
flip_status.flipArg = req.flip_arg;
|
||||
flip_status.currentBuffer = req.index;
|
||||
flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||
{
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
auto& flip_status = port->flip_status;
|
||||
flip_status.count++;
|
||||
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
flip_status.flipArg = req.flip_arg;
|
||||
flip_status.currentBuffer = req.index;
|
||||
if (req.eop) {
|
||||
--flip_status.gcQueueNum;
|
||||
}
|
||||
--flip_status.flipPendingNum;
|
||||
}
|
||||
|
||||
// Trigger flip events for the port.
|
||||
for (auto& event : port->flip_events) {
|
||||
|
@ -202,34 +208,54 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
|
|||
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
{
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) {
|
||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_eop) {
|
||||
++port->flip_status.gcQueueNum;
|
||||
}
|
||||
++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter
|
||||
port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
}
|
||||
|
||||
if (!is_eop) {
|
||||
// Before processing the flip we need to ask GPU thread to flush command list as at this
|
||||
// point VO surface is ready to be presented, and we will need have an actual state of
|
||||
// Vulkan image at the time of frame presentation.
|
||||
liverpool->SendCommand([=, this]() {
|
||||
renderer->FlushDraw();
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
});
|
||||
} else {
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
Vulkan::Frame* frame;
|
||||
if (index == -1) {
|
||||
frame = renderer->PrepareBlankFrame();
|
||||
frame = renderer->PrepareBlankFrame(is_eop);
|
||||
} else {
|
||||
const auto& buffer = port->buffer_slots[index];
|
||||
const auto& group = port->groups[buffer.group_index];
|
||||
frame = renderer->PrepareFrame(group, buffer.address_left, is_eop);
|
||||
}
|
||||
|
||||
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
|
||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
requests.push({
|
||||
.frame = frame,
|
||||
.port = port,
|
||||
.index = index,
|
||||
.flip_arg = flip_arg,
|
||||
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
|
||||
.eop = is_eop,
|
||||
});
|
||||
|
||||
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||
port->flip_status.gcQueueNum = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VideoOutDriver::PresentThread(std::stop_token token) {
|
||||
|
|
|
@ -29,6 +29,7 @@ struct VideoOutPort {
|
|||
std::vector<Kernel::SceKernelEqueue> flip_events;
|
||||
std::vector<Kernel::SceKernelEqueue> vblank_events;
|
||||
std::mutex vo_mutex;
|
||||
std::mutex port_mutex;
|
||||
std::condition_variable vo_cv;
|
||||
std::condition_variable vblank_cv;
|
||||
int flip_rate = 0;
|
||||
|
@ -93,7 +94,6 @@ private:
|
|||
VideoOutPort* port;
|
||||
s32 index;
|
||||
s64 flip_arg;
|
||||
u64 submit_tsc;
|
||||
bool eop;
|
||||
|
||||
operator bool() const noexcept {
|
||||
|
@ -102,6 +102,7 @@ private:
|
|||
};
|
||||
|
||||
std::chrono::microseconds Flip(const Request& req);
|
||||
void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||
void PresentThread(std::stop_token token);
|
||||
|
||||
std::mutex mutex;
|
||||
|
|
|
@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) {
|
|||
|
||||
s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) {
|
||||
LOG_INFO(Lib_VideoOut, "called");
|
||||
s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum;
|
||||
auto* port = driver->GetPort(handle);
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
s32 pending = port->flip_status.flipPendingNum;
|
||||
return pending;
|
||||
}
|
||||
|
||||
|
@ -161,6 +163,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) {
|
|||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
*status = port->flip_status;
|
||||
|
||||
LOG_INFO(Lib_VideoOut,
|
||||
|
|
|
@ -305,7 +305,8 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
|
|||
// Module was just loaded by above code. Allocate TLS block for it.
|
||||
Module* module = m_modules[module_index - 1].get();
|
||||
const u32 init_image_size = module->tls.init_image_size;
|
||||
u8* dest = reinterpret_cast<u8*>(heap_api_func(module->tls.image_size));
|
||||
// TODO: Determine if Windows will crash from this
|
||||
u8* dest = reinterpret_cast<u8*>(heap_api->heap_malloc(module->tls.image_size));
|
||||
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
|
||||
std::memcpy(dest, src, init_image_size);
|
||||
std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size);
|
||||
|
@ -335,10 +336,23 @@ void Linker::InitTlsForThread(bool is_primary) {
|
|||
&addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls");
|
||||
ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread");
|
||||
} else {
|
||||
if (heap_api_func) {
|
||||
addr_out = heap_api_func(total_tls_size);
|
||||
if (heap_api) {
|
||||
#ifndef WIN32
|
||||
addr_out = heap_api->heap_malloc(total_tls_size);
|
||||
} else {
|
||||
addr_out = std::malloc(total_tls_size);
|
||||
#else
|
||||
// TODO: Windows tls malloc replacement, refer to rtld_tls_block_malloc
|
||||
LOG_ERROR(Core_Linker, "TLS user malloc called, using std::malloc");
|
||||
addr_out = std::malloc(total_tls_size);
|
||||
if (!addr_out) {
|
||||
auto pth_id = pthread_self();
|
||||
auto handle = pthread_gethandle(pth_id);
|
||||
ASSERT_MSG(addr_out,
|
||||
"Cannot allocate TLS block defined for handle=%x, index=%d size=%d",
|
||||
handle, pth_id, total_tls_size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,21 @@ struct EntryParams {
|
|||
const char* argv[3];
|
||||
};
|
||||
|
||||
using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t);
|
||||
struct HeapAPI {
|
||||
PS4_SYSV_ABI void* (*heap_malloc)(size_t);
|
||||
PS4_SYSV_ABI void (*heap_free)(void*);
|
||||
PS4_SYSV_ABI void* (*heap_calloc)(size_t, size_t);
|
||||
PS4_SYSV_ABI void* (*heap_realloc)(void*, size_t);
|
||||
PS4_SYSV_ABI void* (*heap_memalign)(size_t, size_t);
|
||||
PS4_SYSV_ABI int (*heap_posix_memalign)(void**, size_t, size_t);
|
||||
// NOTE: Fields below may be inaccurate
|
||||
PS4_SYSV_ABI int (*heap_reallocalign)(void);
|
||||
PS4_SYSV_ABI void (*heap_malloc_stats)(void);
|
||||
PS4_SYSV_ABI int (*heap_malloc_stats_fast)(void);
|
||||
PS4_SYSV_ABI size_t (*heap_malloc_usable_size)(void*);
|
||||
};
|
||||
|
||||
using AppHeapAPI = HeapAPI*;
|
||||
|
||||
class Linker {
|
||||
public:
|
||||
|
@ -75,8 +89,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void SetHeapApiFunc(void* func) {
|
||||
heap_api_func = *reinterpret_cast<HeapApiFunc*>(func);
|
||||
void SetHeapAPI(void* func[]) {
|
||||
heap_api = reinterpret_cast<AppHeapAPI>(func);
|
||||
}
|
||||
|
||||
void AdvanceGenerationCounter() noexcept {
|
||||
|
@ -104,7 +118,7 @@ private:
|
|||
size_t static_tls_size{};
|
||||
u32 max_tls_index{};
|
||||
u32 num_static_modules{};
|
||||
HeapApiFunc heap_api_func{};
|
||||
AppHeapAPI heap_api{};
|
||||
std::vector<std::unique_ptr<Module>> m_modules;
|
||||
Loader::SymbolsResolver m_hle_symbols{};
|
||||
};
|
||||
|
|
|
@ -55,7 +55,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
|||
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
|
||||
|
||||
// Add the allocated region to the list and commit its pages.
|
||||
auto& area = CarveDmemArea(free_addr, size);
|
||||
auto& area = CarveDmemArea(free_addr, size)->second;
|
||||
area.memory_type = memory_type;
|
||||
area.is_free = false;
|
||||
return free_addr;
|
||||
|
@ -64,9 +64,8 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
|||
void MemoryManager::Free(PAddr phys_addr, size_t size) {
|
||||
std::scoped_lock lk{mutex};
|
||||
|
||||
const auto dmem_area = FindDmemArea(phys_addr);
|
||||
ASSERT(dmem_area != dmem_map.end() && dmem_area->second.base == phys_addr &&
|
||||
dmem_area->second.size == size);
|
||||
auto dmem_area = CarveDmemArea(phys_addr, size);
|
||||
ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size);
|
||||
|
||||
// Release any dmem mappings that reference this physical block.
|
||||
std::vector<std::pair<VAddr, u64>> remove_list;
|
||||
|
@ -75,10 +74,11 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
|
|||
continue;
|
||||
}
|
||||
if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) {
|
||||
LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr,
|
||||
mapping.size);
|
||||
auto vma_segment_start_addr = phys_addr - mapping.phys_base + addr;
|
||||
LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}",
|
||||
vma_segment_start_addr, size);
|
||||
// Unmaping might erase from vma_map. We can't do it here.
|
||||
remove_list.emplace_back(addr, mapping.size);
|
||||
remove_list.emplace_back(vma_segment_start_addr, size);
|
||||
}
|
||||
}
|
||||
for (const auto& [addr, size] : remove_list) {
|
||||
|
@ -105,8 +105,6 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
|
|||
const auto& vma = FindVMA(mapped_addr)->second;
|
||||
// If the VMA is mapped, unmap the region first.
|
||||
if (vma.IsMapped()) {
|
||||
ASSERT_MSG(vma.base == mapped_addr && vma.size == size,
|
||||
"Region must match when reserving a mapped region");
|
||||
UnmapMemory(mapped_addr, size);
|
||||
}
|
||||
const size_t remaining_size = vma.base + vma.size - mapped_addr;
|
||||
|
@ -170,6 +168,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
|||
new_vma.prot = prot;
|
||||
new_vma.name = name;
|
||||
new_vma.type = type;
|
||||
new_vma.is_exec = is_exec;
|
||||
|
||||
if (type == VMAType::Direct) {
|
||||
new_vma.phys_base = phys_addr;
|
||||
|
@ -217,10 +216,16 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
std::scoped_lock lk{mutex};
|
||||
|
||||
const auto it = FindVMA(virtual_addr);
|
||||
ASSERT_MSG(it->second.Contains(virtual_addr, size),
|
||||
const auto& vma_base = it->second;
|
||||
ASSERT_MSG(vma_base.Contains(virtual_addr, size),
|
||||
"Existing mapping does not contain requested unmap range");
|
||||
|
||||
const auto type = it->second.type;
|
||||
const auto vma_base_addr = vma_base.base;
|
||||
const auto vma_base_size = vma_base.size;
|
||||
const auto phys_base = vma_base.phys_base;
|
||||
const bool is_exec = vma_base.is_exec;
|
||||
const auto start_in_vma = virtual_addr - vma_base_addr;
|
||||
const auto type = vma_base.type;
|
||||
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
|
||||
if (type == VMAType::Direct) {
|
||||
rasterizer->UnmapMemory(virtual_addr, size);
|
||||
|
@ -240,7 +245,8 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
|||
MergeAdjacent(vma_map, new_it);
|
||||
|
||||
// Unmap the memory region.
|
||||
impl.Unmap(virtual_addr, size, has_backing);
|
||||
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec,
|
||||
has_backing);
|
||||
TRACK_FREE(virtual_addr, "VMEM");
|
||||
}
|
||||
|
||||
|
@ -364,10 +370,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
|
|||
std::scoped_lock lk{mutex};
|
||||
|
||||
auto it = FindVMA(addr);
|
||||
if (!it->second.IsMapped() && flags == 1) {
|
||||
if (it->second.type == VMAType::Free && flags == 1) {
|
||||
it++;
|
||||
}
|
||||
if (!it->second.IsMapped()) {
|
||||
if (it->second.type == VMAType::Free) {
|
||||
LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
|
||||
return ORBIS_KERNEL_ERROR_EACCES;
|
||||
}
|
||||
|
@ -494,13 +500,12 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size
|
|||
return vma_handle;
|
||||
}
|
||||
|
||||
DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
|
||||
MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
|
||||
auto dmem_handle = FindDmemArea(addr);
|
||||
ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map");
|
||||
|
||||
const DirectMemoryArea& area = dmem_handle->second;
|
||||
ASSERT_MSG(area.is_free && area.base <= addr,
|
||||
"Adding an allocation to already allocated region");
|
||||
ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region");
|
||||
|
||||
const PAddr start_in_area = addr - area.base;
|
||||
const PAddr end_in_vma = start_in_area + size;
|
||||
|
@ -515,7 +520,7 @@ DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
|
|||
dmem_handle = Split(dmem_handle, start_in_area);
|
||||
}
|
||||
|
||||
return dmem_handle->second;
|
||||
return dmem_handle;
|
||||
}
|
||||
|
||||
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) {
|
||||
|
|
|
@ -84,6 +84,7 @@ struct VirtualMemoryArea {
|
|||
bool disallow_merge = false;
|
||||
std::string name = "";
|
||||
uintptr_t fd = 0;
|
||||
bool is_exec = false;
|
||||
|
||||
bool Contains(VAddr addr, size_t size) const {
|
||||
return addr >= base && (addr + size) <= (base + this->size);
|
||||
|
@ -210,7 +211,7 @@ private:
|
|||
|
||||
VMAHandle CarveVMA(VAddr virtual_addr, size_t size);
|
||||
|
||||
DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size);
|
||||
DMemHandle CarveDmemArea(PAddr addr, size_t size);
|
||||
|
||||
VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <SDL3/SDL.h>
|
||||
#include "core/libraries/kernel/time_management.h"
|
||||
#include "core/libraries/pad/pad.h"
|
||||
#include "input/controller.h"
|
||||
|
@ -117,4 +118,29 @@ void GameController::Axis(int id, Input::Axis axis, int value) {
|
|||
AddState(state);
|
||||
}
|
||||
|
||||
void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) {
|
||||
if (m_sdl_gamepad != nullptr) {
|
||||
SDL_SetGamepadLED(m_sdl_gamepad, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
bool GameController::SetVibration(u8 smallMotor, u8 largeMotor) {
|
||||
if (m_sdl_gamepad != nullptr) {
|
||||
return SDL_RumbleGamepad(m_sdl_gamepad, (smallMotor / 255.0f) * 0xFFFF,
|
||||
(largeMotor / 255.0f) * 0xFFFF, -1) == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GameController::TryOpenSDLController() {
|
||||
if (m_sdl_gamepad == nullptr || !SDL_GamepadConnected(m_sdl_gamepad)) {
|
||||
int gamepad_count;
|
||||
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
|
||||
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
|
||||
SDL_free(gamepads);
|
||||
}
|
||||
|
||||
SetLightBarRGB(0, 0, 255);
|
||||
}
|
||||
|
||||
} // namespace Input
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include <mutex>
|
||||
#include "common/types.h"
|
||||
|
||||
struct SDL_Gamepad;
|
||||
|
||||
namespace Input {
|
||||
|
||||
enum class Axis {
|
||||
|
@ -43,6 +45,9 @@ public:
|
|||
void CheckButton(int id, u32 button, bool isPressed);
|
||||
void AddState(const State& state);
|
||||
void Axis(int id, Input::Axis axis, int value);
|
||||
void SetLightBarRGB(u8 r, u8 g, u8 b);
|
||||
bool SetVibration(u8 smallMotor, u8 largeMotor);
|
||||
void TryOpenSDLController();
|
||||
|
||||
private:
|
||||
struct StateInternal {
|
||||
|
@ -57,6 +62,8 @@ private:
|
|||
u32 m_first_state = 0;
|
||||
std::array<State, MAX_STATES> m_states;
|
||||
std::array<StateInternal, MAX_STATES> m_private;
|
||||
|
||||
SDL_Gamepad* m_sdl_gamepad = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Input
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#include "core/loader.h"
|
||||
#include "game_install_dialog.h"
|
||||
#include "main_window.h"
|
||||
#include "settings_dialog.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) {
|
||||
ui->setupUi(this);
|
||||
|
@ -38,6 +40,7 @@ bool MainWindow::Init() {
|
|||
CreateConnects();
|
||||
SetLastUsedTheme();
|
||||
SetLastIconSizeBullet();
|
||||
GetPhysicalDevices();
|
||||
// show ui
|
||||
setMinimumSize(350, minimumSizeHint().height());
|
||||
setWindowTitle(QString::fromStdString("shadPS4 v" + std::string(Common::VERSION)));
|
||||
|
@ -157,6 +160,19 @@ void MainWindow::LoadGameLists() {
|
|||
}
|
||||
}
|
||||
|
||||
void MainWindow::GetPhysicalDevices() {
|
||||
Vulkan::Instance instance(false, false);
|
||||
auto physical_devices = instance.GetPhysicalDevices();
|
||||
for (const vk::PhysicalDevice physical_device : physical_devices) {
|
||||
auto prop = physical_device.getProperties();
|
||||
QString name = QString::fromUtf8(prop.deviceName, -1);
|
||||
if (prop.apiVersion < Vulkan::TargetVulkanApiVersion) {
|
||||
name += " * Unsupported Vulkan Version";
|
||||
}
|
||||
m_physical_devices.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
void MainWindow::CreateConnects() {
|
||||
connect(this, &MainWindow::WindowResized, this, &MainWindow::HandleResize);
|
||||
connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable);
|
||||
|
@ -185,6 +201,11 @@ void MainWindow::CreateConnects() {
|
|||
connect(m_game_list_frame.get(), &QTableWidget::cellDoubleClicked, this,
|
||||
&MainWindow::StartGame);
|
||||
|
||||
connect(ui->settingsButton, &QPushButton::clicked, this, [this]() {
|
||||
auto settingsDialog = new SettingsDialog(m_physical_devices, this);
|
||||
settingsDialog->exec();
|
||||
});
|
||||
|
||||
connect(ui->setIconSizeTinyAct, &QAction::triggered, this, [this]() {
|
||||
if (isTableList) {
|
||||
m_game_list_frame->icon_size =
|
||||
|
|
|
@ -54,6 +54,7 @@ private:
|
|||
void CreateActions();
|
||||
void CreateRecentGameActions();
|
||||
void CreateDockWindows();
|
||||
void GetPhysicalDevices();
|
||||
void LoadGameLists();
|
||||
void CreateConnects();
|
||||
void SetLastUsedTheme();
|
||||
|
@ -79,6 +80,8 @@ private:
|
|||
QScopedPointer<ElfViewer> m_elf_viewer;
|
||||
// Status Bar.
|
||||
QScopedPointer<QStatusBar> statusBar;
|
||||
// Available GPU devices
|
||||
std::vector<QString> m_physical_devices;
|
||||
|
||||
PSF psf;
|
||||
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "settings_dialog.h"
|
||||
#include "ui_settings_dialog.h"
|
||||
|
||||
SettingsDialog::SettingsDialog(std::span<const QString> physical_devices, QWidget* parent)
|
||||
: QDialog(parent), ui(new Ui::SettingsDialog) {
|
||||
ui->setupUi(this);
|
||||
ui->tabWidgetSettings->setUsesScrollButtons(false);
|
||||
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
|
||||
|
||||
ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus();
|
||||
|
||||
// Add list of available GPUs
|
||||
ui->graphicsAdapterBox->addItem("Auto Select"); // -1, auto selection
|
||||
for (const auto& device : physical_devices) {
|
||||
ui->graphicsAdapterBox->addItem(device);
|
||||
}
|
||||
|
||||
LoadValuesFromConfig();
|
||||
|
||||
connect(ui->buttonBox, &QDialogButtonBox::rejected, this, &QWidget::close);
|
||||
|
||||
connect(ui->buttonBox, &QDialogButtonBox::clicked, this,
|
||||
[this, config_dir](QAbstractButton* button) {
|
||||
if (button == ui->buttonBox->button(QDialogButtonBox::Save)) {
|
||||
Config::save(config_dir / "config.toml");
|
||||
QWidget::close();
|
||||
} else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) {
|
||||
Config::save(config_dir / "config.toml");
|
||||
} else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) {
|
||||
Config::setDefaultValues();
|
||||
LoadValuesFromConfig();
|
||||
}
|
||||
});
|
||||
|
||||
connect(ui->tabWidgetSettings, &QTabWidget::currentChanged, this, [this]() {
|
||||
ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus();
|
||||
});
|
||||
|
||||
// EMULATOR TAB
|
||||
{
|
||||
connect(ui->consoleLanguageComboBox, &QComboBox::currentIndexChanged, this,
|
||||
[](int index) { Config::setLanguage(index); });
|
||||
}
|
||||
|
||||
// GPU TAB
|
||||
{
|
||||
// First options is auto selection -1, so gpuId on the GUI will always have to subtract 1
|
||||
// when setting and add 1 when getting to select the correct gpu in Qt
|
||||
connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this,
|
||||
[](int index) { Config::setGpuId(index - 1); });
|
||||
|
||||
connect(ui->widthSpinBox, &QSpinBox::valueChanged, this,
|
||||
[](int val) { Config::setScreenWidth(val); });
|
||||
|
||||
connect(ui->heightSpinBox, &QSpinBox::valueChanged, this,
|
||||
[](int val) { Config::setScreenHeight(val); });
|
||||
|
||||
connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this,
|
||||
[](int val) { Config::setVblankDiv(val); });
|
||||
|
||||
connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setDumpShaders(val); });
|
||||
|
||||
connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setNullGpu(val); });
|
||||
|
||||
connect(ui->dumpPM4CheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setDumpPM4(val); });
|
||||
}
|
||||
|
||||
// GENERAL TAB
|
||||
{
|
||||
connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setFullscreenMode(val); });
|
||||
|
||||
connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setShowSplash(val); });
|
||||
|
||||
connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setNeoMode(val); });
|
||||
|
||||
connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this,
|
||||
[](const QString& text) { Config::setLogType(text.toStdString()); });
|
||||
|
||||
connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this,
|
||||
[](const QString& text) { Config::setLogFilter(text.toStdString()); });
|
||||
}
|
||||
|
||||
// DEBUG TAB
|
||||
{
|
||||
connect(ui->debugDump, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setDebugDump(val); });
|
||||
|
||||
connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setVkValidation(val); });
|
||||
|
||||
connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setVkSyncValidation(val); });
|
||||
|
||||
connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this,
|
||||
[](int val) { Config::setRdocEnabled(val); });
|
||||
}
|
||||
}
|
||||
|
||||
void SettingsDialog::LoadValuesFromConfig() {
|
||||
ui->consoleLanguageComboBox->setCurrentIndex(Config::GetLanguage());
|
||||
|
||||
ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1);
|
||||
ui->widthSpinBox->setValue(Config::getScreenWidth());
|
||||
ui->heightSpinBox->setValue(Config::getScreenHeight());
|
||||
ui->vblankSpinBox->setValue(Config::vblankDiv());
|
||||
ui->dumpShadersCheckBox->setChecked(Config::dumpShaders());
|
||||
ui->nullGpuCheckBox->setChecked(Config::nullGpu());
|
||||
ui->dumpPM4CheckBox->setChecked(Config::dumpPM4());
|
||||
|
||||
ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode());
|
||||
ui->showSplashCheckBox->setChecked(Config::showSplash());
|
||||
ui->ps4proCheckBox->setChecked(Config::isNeoMode());
|
||||
ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType()));
|
||||
ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter()));
|
||||
|
||||
ui->debugDump->setChecked(Config::debugDump());
|
||||
ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled());
|
||||
ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled());
|
||||
ui->rdocCheckBox->setChecked(Config::isRdocEnabled());
|
||||
}
|
||||
|
||||
int SettingsDialog::exec() {
|
||||
return QDialog::exec();
|
||||
}
|
||||
|
||||
SettingsDialog::~SettingsDialog() {}
|
|
@ -0,0 +1,29 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <QDialog>
|
||||
#include <QPushButton>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/path_util.h"
|
||||
|
||||
namespace Ui {
|
||||
class SettingsDialog;
|
||||
}
|
||||
|
||||
class SettingsDialog : public QDialog {
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit SettingsDialog(std::span<const QString> physical_devices, QWidget* parent = nullptr);
|
||||
~SettingsDialog();
|
||||
|
||||
int exec() override;
|
||||
|
||||
private:
|
||||
void LoadValuesFromConfig();
|
||||
|
||||
std::unique_ptr<Ui::SettingsDialog> ui;
|
||||
};
|
|
@ -0,0 +1,908 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
SPDX-License-Identifier: GPL-2.0-or-later -->
|
||||
|
||||
<ui version="4.0">
|
||||
<class>SettingsDialog</class>
|
||||
<widget class="QDialog" name="SettingsDialog">
|
||||
<property name="windowModality">
|
||||
<enum>Qt::WindowModality::WindowModal</enum>
|
||||
</property>
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>1024</width>
|
||||
<height>768</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
<string>Settings</string>
|
||||
</property>
|
||||
<property name="windowIcon">
|
||||
<iconset>
|
||||
<normaloff>:/images/shadps4.ico</normaloff>:/images/shadps4.ico</iconset>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="settingsDialogLayout">
|
||||
<item>
|
||||
<widget class="QScrollArea" name="scrollArea">
|
||||
<property name="frameShape">
|
||||
<enum>QFrame::Shape::NoFrame</enum>
|
||||
</property>
|
||||
<property name="widgetResizable">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<widget class="QTabWidget" name="tabWidgetSettings">
|
||||
<property name="enabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>1006</width>
|
||||
<height>720</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="currentIndex">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<widget class="QWidget" name="emulatorTab">
|
||||
<attribute name="title">
|
||||
<string>Emulator</string>
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="emulatorTabVLayout" stretch="0,0">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="emulatorTabHLayout" stretch="1,1,1">
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="emulatorTabLayoutLeft">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="consoleLanguageGroupBox">
|
||||
<property name="title">
|
||||
<string>Console Language</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="settingsLayout">
|
||||
<item>
|
||||
<widget class="QComboBox" name="consoleLanguageComboBox">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Japanese</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>English (United States)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>French (France)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Spanish (Spain)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>German</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Italian</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Dutch</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Portuguese (Portugal)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Russian</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Korean</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Traditional Chinese</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Simplified Chinese</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Finnish</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Swedish</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Danish</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Norwegian</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Polish</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Portuguese (Brazil)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>English (United Kingdom)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Turkish</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Spanish (Latin America)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Arabic</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>French (Canada)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Czech</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Hungarian</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Greek</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Romanian</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Thai</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Vietnamese</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Indonesian</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="widgetSettingsTop" native="true">
|
||||
<layout class="QHBoxLayout" name="widgetGpuTopLayout">
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="topMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="rightMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="widgetSettingsBottom" native="true">
|
||||
<layout class="QHBoxLayout" name="widgetGpuBottomLayout">
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="topMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="rightMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="emulator_tab_layout_right_spacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="emulatorTabLayoutMiddle_2"/>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="emulatorTabLayoutRight">
|
||||
<property name="rightMargin">
|
||||
<number>12</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>12</number>
|
||||
</property>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="emulatorTabSpacer_2">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="gpuTab">
|
||||
<attribute name="title">
|
||||
<string>GPU</string>
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="gpuTabVLayout" stretch="0,0">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="gpuTabHLayout" stretch="1,1,1">
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="gpuTabLayoutLeft">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="graphicsAdapterGroupBox">
|
||||
<property name="title">
|
||||
<string>Graphics Device</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="graphicsAdapterLayout">
|
||||
<item>
|
||||
<widget class="QComboBox" name="graphicsAdapterBox"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="widgetGpuTop" native="true">
|
||||
<layout class="QHBoxLayout" name="widgetGpuTopHLayout">
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="topMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="rightMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="widgetGpuBottom" native="true">
|
||||
<layout class="QHBoxLayout" name="widgetGpuBottomHLayout">
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="topMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="rightMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="gpu_tab_layout_right_spacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="gpuTabLayoutMiddle">
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="layoutResolution">
|
||||
<property name="spacing">
|
||||
<number>6</number>
|
||||
</property>
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="resolutionLayout">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="widthGroupBox">
|
||||
<property name="title">
|
||||
<string>Width</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="widthLayout">
|
||||
<item>
|
||||
<widget class="QSpinBox" name="widthSpinBox">
|
||||
<property name="accelerated">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="correctionMode">
|
||||
<enum>QAbstractSpinBox::CorrectionMode::CorrectToNearestValue</enum>
|
||||
</property>
|
||||
<property name="keyboardTracking">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<number>9999</number>
|
||||
</property>
|
||||
<property name="value">
|
||||
<number>1280</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="heightGroupBox">
|
||||
<property name="title">
|
||||
<string>Height</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="heightLayout">
|
||||
<item>
|
||||
<widget class="QSpinBox" name="heightSpinBox">
|
||||
<property name="frame">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="accelerated">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="correctionMode">
|
||||
<enum>QAbstractSpinBox::CorrectionMode::CorrectToNearestValue</enum>
|
||||
</property>
|
||||
<property name="keyboardTracking">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<number>9999</number>
|
||||
</property>
|
||||
<property name="value">
|
||||
<number>720</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="vLayoutVblank">
|
||||
<property name="spacing">
|
||||
<number>6</number>
|
||||
</property>
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="hLayoutVblank">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="heightDivider">
|
||||
<property name="title">
|
||||
<string>Vblank Divider</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="vblankLayout">
|
||||
<item>
|
||||
<widget class="QSpinBox" name="vblankSpinBox">
|
||||
<property name="frame">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="accelerated">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="correctionMode">
|
||||
<enum>QAbstractSpinBox::CorrectionMode::CorrectToNearestValue</enum>
|
||||
</property>
|
||||
<property name="keyboardTracking">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<number>9999</number>
|
||||
</property>
|
||||
<property name="value">
|
||||
<number>1</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="gpuTabLayoutMiddleSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="gpuTabLayoutRight">
|
||||
<property name="rightMargin">
|
||||
<number>12</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>12</number>
|
||||
</property>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="additionalSettingsGroupBox">
|
||||
<property name="title">
|
||||
<string>Additional Settings</string>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignVCenter</set>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="additionalSettingsLayout">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="dumpShadersCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Shaders Dumping</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="nullGpuCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable NULL GPU</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="dumpPM4CheckBox">
|
||||
<property name="text">
|
||||
<string>Enable PM4 Dumping</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="additionalSettingsSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="gpuTabSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="generalTab">
|
||||
<attribute name="title">
|
||||
<string>General</string>
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="generalTabVLayout" stretch="0,1">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="generalTabHLayout" stretch="1,1,1">
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="generalTabLayoutLeft">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="emuSettings">
|
||||
<property name="title">
|
||||
<string>Emulator Settings</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="emuSettingsLayout">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="fullscreenCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Fullscreen</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="showSplashCheckBox">
|
||||
<property name="text">
|
||||
<string>Show Splash</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="ps4proCheckBox">
|
||||
<property name="text">
|
||||
<string>Is PS4 Pro</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="emulatorTabSpacerLeft">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="emulatorTabLayoutMiddle">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="loggerGroupBox">
|
||||
<property name="title">
|
||||
<string>Logger Settings</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="loggerLayout">
|
||||
<item>
|
||||
<widget class="QWidget" name="LogTypeWidget" native="true">
|
||||
<layout class="QVBoxLayout" name="LogTypeLayout">
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="topMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="rightMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<property name="bottomMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="logTypeGroupBox">
|
||||
<property name="title">
|
||||
<string>Log Type</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="logTypeBoxLayout">
|
||||
<item>
|
||||
<widget class="QComboBox" name="logTypeComboBox">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>async</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>sync</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="vLayoutLogFilter">
|
||||
<property name="spacing">
|
||||
<number>6</number>
|
||||
</property>
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="hLayoutLogFilter">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="logFilter">
|
||||
<property name="title">
|
||||
<string>Log Filter</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="logFilterLayout">
|
||||
<item>
|
||||
<widget class="QLineEdit" name="logFilterLineEdit"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="generalTabLayoutRight">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="additionalSettings">
|
||||
<property name="title">
|
||||
<string>Additional Settings</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="additionalSettingsVLayout">
|
||||
<item>
|
||||
<spacer name="emulatorTabSpacerRight">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="emulatorTabSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="debugTab">
|
||||
<attribute name="title">
|
||||
<string>Debug</string>
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="debugTabVLayout" stretch="0,1">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="debugTabHLayout" stretch="1">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="debugTabGroupBox">
|
||||
<property name="enabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="title">
|
||||
<string>General</string>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop</set>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="debugTabLayout">
|
||||
<item alignment="Qt::AlignmentFlag::AlignTop">
|
||||
<widget class="QCheckBox" name="debugDump">
|
||||
<property name="text">
|
||||
<string>Enable Debug Dumping</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="vkValidationCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Vulkan Validation Layers</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="vkSyncValidationCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Vulkan Synchronization Validation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="rdocCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable RenderDoc Debugging</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="debugTabSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Orientation::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeType">
|
||||
<enum>QSizePolicy::Policy::MinimumExpanding</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</widget>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QDialogButtonBox" name="buttonBox">
|
||||
<property name="standardButtons">
|
||||
<set>QDialogButtonBox::StandardButton::Apply|QDialogButtonBox::StandardButton::Close|QDialogButtonBox::StandardButton::RestoreDefaults|QDialogButtonBox::StandardButton::Save</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
<connections/>
|
||||
</ui>
|
|
@ -43,6 +43,9 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_
|
|||
|
||||
SDL_SetWindowFullscreen(window, Config::isFullscreenMode());
|
||||
|
||||
SDL_InitSubSystem(SDL_INIT_GAMEPAD);
|
||||
controller->TryOpenSDLController();
|
||||
|
||||
#if defined(SDL_PLATFORM_WIN32)
|
||||
window_info.type = WindowSystemType::Windows;
|
||||
window_info.render_surface = SDL_GetPointerProperty(SDL_GetWindowProperties(window),
|
||||
|
@ -92,6 +95,11 @@ void WindowSDL::waitEvent() {
|
|||
case SDL_EVENT_KEY_UP:
|
||||
onKeyPress(&event);
|
||||
break;
|
||||
case SDL_EVENT_GAMEPAD_BUTTON_DOWN:
|
||||
case SDL_EVENT_GAMEPAD_BUTTON_UP:
|
||||
case SDL_EVENT_GAMEPAD_AXIS_MOTION:
|
||||
onGamepadEvent(&event);
|
||||
break;
|
||||
case SDL_EVENT_QUIT:
|
||||
is_open = false;
|
||||
break;
|
||||
|
@ -276,4 +284,71 @@ void WindowSDL::onKeyPress(const SDL_Event* event) {
|
|||
}
|
||||
}
|
||||
|
||||
void WindowSDL::onGamepadEvent(const SDL_Event* event) {
|
||||
using Libraries::Pad::OrbisPadButtonDataOffset;
|
||||
|
||||
u32 button = 0;
|
||||
Input::Axis axis = Input::Axis::AxisMax;
|
||||
switch (event->type) {
|
||||
case SDL_EVENT_GAMEPAD_BUTTON_DOWN:
|
||||
case SDL_EVENT_GAMEPAD_BUTTON_UP:
|
||||
button = sdlGamepadToOrbisButton(event->gbutton.button);
|
||||
if (button != 0) {
|
||||
controller->CheckButton(0, button, event->type == SDL_EVENT_GAMEPAD_BUTTON_DOWN);
|
||||
}
|
||||
break;
|
||||
case SDL_EVENT_GAMEPAD_AXIS_MOTION:
|
||||
axis = event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTX ? Input::Axis::LeftX
|
||||
: event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTY ? Input::Axis::LeftY
|
||||
: event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTX ? Input::Axis::RightX
|
||||
: event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTY ? Input::Axis::RightY
|
||||
: event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFT_TRIGGER ? Input::Axis::TriggerLeft
|
||||
: event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHT_TRIGGER ? Input::Axis::TriggerRight
|
||||
: Input::Axis::AxisMax;
|
||||
if (axis != Input::Axis::AxisMax) {
|
||||
controller->Axis(0, axis, Input::GetAxis(-0x8000, 0x8000, event->gaxis.value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int WindowSDL::sdlGamepadToOrbisButton(u8 button) {
|
||||
using Libraries::Pad::OrbisPadButtonDataOffset;
|
||||
|
||||
switch (button) {
|
||||
case SDL_GAMEPAD_BUTTON_DPAD_DOWN:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_DOWN;
|
||||
case SDL_GAMEPAD_BUTTON_DPAD_UP:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_UP;
|
||||
case SDL_GAMEPAD_BUTTON_DPAD_LEFT:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_LEFT;
|
||||
case SDL_GAMEPAD_BUTTON_DPAD_RIGHT:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_RIGHT;
|
||||
case SDL_GAMEPAD_BUTTON_SOUTH:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CROSS;
|
||||
case SDL_GAMEPAD_BUTTON_NORTH:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TRIANGLE;
|
||||
case SDL_GAMEPAD_BUTTON_WEST:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_SQUARE;
|
||||
case SDL_GAMEPAD_BUTTON_EAST:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CIRCLE;
|
||||
case SDL_GAMEPAD_BUTTON_START:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_OPTIONS;
|
||||
case SDL_GAMEPAD_BUTTON_TOUCHPAD:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD;
|
||||
case SDL_GAMEPAD_BUTTON_BACK:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD;
|
||||
case SDL_GAMEPAD_BUTTON_LEFT_SHOULDER:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L1;
|
||||
case SDL_GAMEPAD_BUTTON_RIGHT_SHOULDER:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R1;
|
||||
case SDL_GAMEPAD_BUTTON_LEFT_STICK:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L3;
|
||||
case SDL_GAMEPAD_BUTTON_RIGHT_STICK:
|
||||
return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R3;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Frontend
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "common/types.h"
|
||||
|
||||
struct SDL_Window;
|
||||
struct SDL_Gamepad;
|
||||
union SDL_Event;
|
||||
|
||||
namespace Input {
|
||||
|
@ -66,6 +67,9 @@ public:
|
|||
private:
|
||||
void onResize();
|
||||
void onKeyPress(const SDL_Event* event);
|
||||
void onGamepadEvent(const SDL_Event* event);
|
||||
|
||||
int sdlGamepadToOrbisButton(u8 button);
|
||||
|
||||
private:
|
||||
s32 width;
|
||||
|
|
|
@ -128,11 +128,7 @@ Id EmitReadConst(EmitContext& ctx) {
|
|||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
||||
}
|
||||
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
}
|
||||
|
@ -218,6 +214,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
||||
if (attr == IR::Attribute::Position1) {
|
||||
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
|
||||
return;
|
||||
}
|
||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
|
||||
}
|
||||
|
@ -229,9 +229,6 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
|||
template <u32 N>
|
||||
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
|
@ -386,19 +383,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
|||
if (is_signed) {
|
||||
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
|
||||
ctx.ConstU32(bit_width));
|
||||
value = ctx.OpConvertSToF(ctx.F32[1], value);
|
||||
} else {
|
||||
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
|
||||
ctx.ConstU32(bit_width));
|
||||
value = ctx.OpConvertUToF(ctx.F32[1], value);
|
||||
}
|
||||
} else {
|
||||
if (is_signed) {
|
||||
value = ctx.OpConvertSToF(ctx.F32[1], value);
|
||||
} else {
|
||||
value = ctx.OpConvertUToF(ctx.F32[1], value);
|
||||
}
|
||||
}
|
||||
value = ctx.OpBitcast(ctx.F32[1], value);
|
||||
return ConvertValue(ctx, value, num_format, bit_width);
|
||||
}
|
||||
break;
|
||||
|
@ -411,9 +401,6 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
|||
template <u32 N>
|
||||
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if constexpr (N == 1) {
|
||||
return GetBufferFormatValue(ctx, handle, address, 0);
|
||||
|
@ -445,9 +432,6 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
|
|||
template <u32 N>
|
||||
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
|
@ -483,4 +467,96 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
|
|||
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
|
||||
u32 bit_width) {
|
||||
switch (format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
return ctx.OpConvertFToU(
|
||||
ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width)))));
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return ctx.OpBitcast(ctx.U32[1], value);
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
return value;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
|
||||
magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto format = buffer.buffer.GetDataFmt();
|
||||
const auto num_format = buffer.buffer.GetNumberFmt();
|
||||
|
||||
switch (format) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
return;
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32: {
|
||||
ASSERT(N == AmdGpu::NumComponents(format));
|
||||
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
|
||||
|
||||
Id packed_value{};
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const u32 bit_width = AmdGpu::ComponentBits(format, i);
|
||||
const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32;
|
||||
|
||||
const Id comp{ConvertF32ToFormat(
|
||||
ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format,
|
||||
bit_width)};
|
||||
|
||||
if (bit_width == 32) {
|
||||
if constexpr (N == 1) {
|
||||
ctx.OpStore(ptr, comp);
|
||||
} else {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id,
|
||||
ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, comp);
|
||||
}
|
||||
} else {
|
||||
if (i == 0) {
|
||||
packed_value = comp;
|
||||
} else {
|
||||
packed_value =
|
||||
ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp,
|
||||
ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width));
|
||||
}
|
||||
|
||||
if (i == N - 1) {
|
||||
ctx.OpStore(ptr, packed_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
|
||||
}
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
|
||||
Id value) {
|
||||
EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -17,86 +17,133 @@ struct ImageOperands {
|
|||
operands.push_back(value);
|
||||
}
|
||||
|
||||
void AddOffset(EmitContext& ctx, const IR::Value& offset,
|
||||
bool can_use_runtime_offsets = false) {
|
||||
if (offset.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
if (offset.IsImmediate()) {
|
||||
const s32 operand = offset.U32();
|
||||
Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand));
|
||||
return;
|
||||
}
|
||||
IR::Inst* const inst{offset.InstRecursive()};
|
||||
if (inst->AreAllArgsImmediates()) {
|
||||
switch (inst->GetOpcode()) {
|
||||
case IR::Opcode::CompositeConstructU32x2:
|
||||
Add(spv::ImageOperandsMask::ConstOffset,
|
||||
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
|
||||
static_cast<s32>(inst->Arg(1).U32())));
|
||||
return;
|
||||
case IR::Opcode::CompositeConstructU32x3:
|
||||
Add(spv::ImageOperandsMask::ConstOffset,
|
||||
ctx.ConstS32(static_cast<s32>(inst->Arg(0).U32()),
|
||||
static_cast<s32>(inst->Arg(1).U32()),
|
||||
static_cast<s32>(inst->Arg(2).U32())));
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (can_use_runtime_offsets) {
|
||||
Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Runtime offset provided to unsupported image sample instruction");
|
||||
}
|
||||
}
|
||||
|
||||
spv::ImageOperandsMask mask{};
|
||||
boost::container::static_vector<Id, 4> operands;
|
||||
};
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset) {
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
||||
const IR::Value& offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Bias, bias);
|
||||
operands.AddOffset(ctx, offset);
|
||||
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset) {
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||
const IR::Value& offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords,
|
||||
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
operands.AddOffset(ctx, offset);
|
||||
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
Id bias_lc, const IR::Value& offset) {
|
||||
Id bias, const IR::Value& offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref);
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Bias, bias);
|
||||
operands.AddOffset(ctx, offset);
|
||||
return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
Id bias_lc, Id offset) {
|
||||
Id lod, const IR::Value& offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref,
|
||||
spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f));
|
||||
ImageOperands operands;
|
||||
operands.AddOffset(ctx, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) {
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
operands.AddOffset(ctx, offset);
|
||||
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
|
||||
Id offset2, Id dref) {
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset, Id dref) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||
ImageOperands operands;
|
||||
operands.Add(spv::ImageOperandsMask::Offset, offset);
|
||||
operands.AddOffset(ctx, offset);
|
||||
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
|
||||
operands.operands);
|
||||
}
|
||||
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||
Id ms) {
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
||||
Id lod, Id ms) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id result_type = texture.data_types->Get(4);
|
||||
if (Sirit::ValidId(lod)) {
|
||||
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
|
||||
spv::ImageOperandsMask::Lod, lod));
|
||||
} else {
|
||||
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
|
||||
}
|
||||
ImageOperands operands;
|
||||
operands.AddOffset(ctx, offset);
|
||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||
return ctx.OpBitcast(
|
||||
ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands));
|
||||
}
|
||||
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {
|
||||
|
|
|
@ -76,6 +76,10 @@ void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
|
|||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
|
@ -93,15 +97,9 @@ Id EmitUndefU8(EmitContext& ctx);
|
|||
Id EmitUndefU16(EmitContext& ctx);
|
||||
Id EmitUndefU32(EmitContext& ctx);
|
||||
Id EmitUndefU64(EmitContext& ctx);
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
|
||||
|
@ -357,19 +355,20 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
|||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
||||
const IR::Value& offset);
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||
const IR::Value& offset);
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
Id bias_lc, const IR::Value& offset);
|
||||
Id bias, const IR::Value& offset);
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||
Id bias_lc, Id offset);
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2);
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset,
|
||||
Id offset2, Id dref);
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
|
||||
Id ms);
|
||||
Id lod, const IR::Value& offset);
|
||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset);
|
||||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
const IR::Value& offset, Id dref);
|
||||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset,
|
||||
Id lod, Id ms);
|
||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
|
|
|
@ -5,99 +5,25 @@
|
|||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||
const Id shift_id{ctx.ConstU32(shift)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
Id Word(EmitContext& ctx, Id offset) {
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
|
||||
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))};
|
||||
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))};
|
||||
const Id count_id{ctx.ConstU32(count)};
|
||||
return {bit, count_id};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
} else {
|
||||
return Word(ctx, offset);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
return ctx.OpLoad(ctx.U32[2], pointer);
|
||||
} else {
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
return ctx.OpLoad(ctx.U32[4], pointer);
|
||||
}
|
||||
const Id shift_id{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
std::array<Id, 4> values{};
|
||||
|
@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
|||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
Id pointer{};
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
|
||||
} else {
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
}
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
|
||||
|
@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
|||
}
|
||||
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.ConstU32(2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
void EmitPrologue(EmitContext& ctx) {}
|
||||
void EmitPrologue(EmitContext& ctx) {
|
||||
ctx.DefineBufferOffsets();
|
||||
}
|
||||
|
||||
void EmitEpilogue(EmitContext& ctx) {}
|
||||
|
||||
|
|
|
@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
Id EmitContext::GetBufferOffset(u32 binding) {
|
||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
void EmitContext::DefineBufferOffsets() {
|
||||
for (auto& buffer : buffers) {
|
||||
const u32 binding = buffer.binding;
|
||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
||||
}
|
||||
}
|
||||
|
||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||
|
@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() {
|
|||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
const Id data_type = (*data_types)[1];
|
||||
const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))};
|
||||
const Id record_array_type{buffer.is_storage
|
||||
? TypeRuntimeArray(data_type)
|
||||
: TypeArray(data_type, ConstU32(buffer.length))};
|
||||
const Id struct_type{TypeStruct(record_array_type)};
|
||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||
|
@ -354,7 +360,7 @@ void EmitContext::DefineBuffers() {
|
|||
|
||||
buffers.push_back({
|
||||
.id = id,
|
||||
.global_binding = binding++,
|
||||
.binding = binding++,
|
||||
.data_types = data_types,
|
||||
.pointer_type = pointer_type,
|
||||
.buffer = buffer.GetVsharp(info),
|
||||
|
@ -401,6 +407,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||
return spv::ImageFormat::Rgba16f;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::Rgba16;
|
||||
}
|
||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
|
||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||
return spv::ImageFormat::R8;
|
||||
|
@ -507,43 +517,9 @@ void EmitContext::DefineSharedMemory() {
|
|||
if (info.shared_memory_size == 0) {
|
||||
info.shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
const auto make{[&](Id element_type, u32 element_size) {
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)};
|
||||
const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
|
||||
Decorate(array_type, spv::Decoration::ArrayStride, element_size);
|
||||
|
||||
const Id struct_type{TypeStruct(array_type)};
|
||||
MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
|
||||
const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
|
||||
const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
|
||||
const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
|
||||
Decorate(variable, spv::Decoration::Aliased);
|
||||
interfaces.push_back(variable);
|
||||
|
||||
return std::make_tuple(variable, element_pointer, pointer);
|
||||
}};
|
||||
if (profile.support_explicit_workgroup_layout) {
|
||||
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||
if (info.uses_shared_u8) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
||||
}
|
||||
if (info.uses_shared_u16) {
|
||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
||||
}
|
||||
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
|
||||
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
|
||||
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
|
||||
return;
|
||||
}
|
||||
const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
|
|
|
@ -40,7 +40,7 @@ public:
|
|||
~EmitContext();
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
Id GetBufferOffset(u32 binding);
|
||||
void DefineBufferOffsets();
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
|
@ -203,7 +203,8 @@ public:
|
|||
struct BufferDefinition {
|
||||
Id id;
|
||||
Id offset;
|
||||
u32 global_binding;
|
||||
Id offset_dwords;
|
||||
u32 binding;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
AmdGpu::Buffer buffer;
|
||||
|
|
|
@ -73,101 +73,190 @@ void Translator::EmitPrologue() {
|
|||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
IR::U32F32 value{};
|
||||
template <typename T>
|
||||
T Translator::GetSrc(const InstOperand& operand) {
|
||||
constexpr bool is_float = std::is_same_v<T, IR::F32>;
|
||||
|
||||
const bool is_float = operand.type == ScalarType::Float32 || force_flt;
|
||||
const auto get_imm = [&](auto value) -> T {
|
||||
if constexpr (is_float) {
|
||||
return ir.Imm32(std::bit_cast<float>(value));
|
||||
} else {
|
||||
return ir.Imm32(std::bit_cast<u32>(value));
|
||||
}
|
||||
};
|
||||
|
||||
T value{};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (is_float) {
|
||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
} else {
|
||||
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
}
|
||||
value = ir.GetScalarReg<T>(IR::ScalarReg(operand.code));
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (is_float) {
|
||||
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
} else {
|
||||
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
}
|
||||
value = ir.GetVectorReg<T>(IR::VectorReg(operand.code));
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
if (is_float) {
|
||||
value = ir.Imm32(0.f);
|
||||
} else {
|
||||
value = ir.Imm32(0U);
|
||||
}
|
||||
value = get_imm(0U);
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
ASSERT(!force_flt);
|
||||
value = ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
||||
value = get_imm(operand.code - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
ASSERT(!force_flt);
|
||||
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
if (is_float) {
|
||||
value = ir.Imm32(std::bit_cast<float>(operand.code));
|
||||
} else {
|
||||
value = ir.Imm32(operand.code);
|
||||
}
|
||||
value = get_imm(operand.code);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
if (is_float) {
|
||||
value = ir.Imm32(1.f);
|
||||
} else {
|
||||
value = ir.Imm32(std::bit_cast<u32>(1.f));
|
||||
}
|
||||
value = get_imm(1.f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
value = ir.Imm32(0.5f);
|
||||
value = get_imm(0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
value = ir.Imm32(2.0f);
|
||||
value = get_imm(2.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
value = ir.Imm32(4.0f);
|
||||
value = get_imm(4.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
value = ir.Imm32(-0.5f);
|
||||
value = get_imm(-0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
if (is_float) {
|
||||
value = ir.Imm32(-1.0f);
|
||||
} else {
|
||||
value = ir.Imm32(std::bit_cast<u32>(-1.0f));
|
||||
}
|
||||
value = get_imm(-1.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
value = ir.Imm32(-2.0f);
|
||||
value = get_imm(-2.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_4_0:
|
||||
value = ir.Imm32(-4.0f);
|
||||
value = get_imm(-4.0f);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
if (force_flt) {
|
||||
if constexpr (is_float) {
|
||||
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
||||
} else {
|
||||
value = ir.GetVccLo();
|
||||
}
|
||||
break;
|
||||
case OperandField::VccHi:
|
||||
if (force_flt) {
|
||||
if constexpr (is_float) {
|
||||
value = ir.BitCast<IR::F32>(ir.GetVccHi());
|
||||
} else {
|
||||
value = ir.GetVccHi();
|
||||
}
|
||||
break;
|
||||
case OperandField::M0:
|
||||
return m0_value;
|
||||
if constexpr (is_float) {
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
return m0_value;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (is_float) {
|
||||
if constexpr (is_float) {
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(value);
|
||||
}
|
||||
} else {
|
||||
if (operand.input_modifier.abs) {
|
||||
LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction");
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
template IR::U32 Translator::GetSrc<IR::U32>(const InstOperand&);
|
||||
template IR::F32 Translator::GetSrc<IR::F32>(const InstOperand&);
|
||||
|
||||
template <typename T>
|
||||
T Translator::GetSrc64(const InstOperand& operand) {
|
||||
constexpr bool is_float = std::is_same_v<T, IR::F64>;
|
||||
|
||||
const auto get_imm = [&](auto value) -> T {
|
||||
if constexpr (is_float) {
|
||||
return ir.Imm64(std::bit_cast<double>(value));
|
||||
} else {
|
||||
return ir.Imm64(std::bit_cast<u64>(value));
|
||||
}
|
||||
};
|
||||
|
||||
T value{};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR: {
|
||||
const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code));
|
||||
const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1));
|
||||
if constexpr (is_float) {
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperandField::VectorGPR: {
|
||||
const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code));
|
||||
const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1));
|
||||
if constexpr (is_float) {
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperandField::ConstZero:
|
||||
value = get_imm(0ULL);
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
value = get_imm(u64(operand.code));
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
value = get_imm(1.0);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
value = get_imm(0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
value = get_imm(2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
value = get_imm(4.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
value = get_imm(-0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
value = get_imm(-1.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
value = get_imm(-2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_4_0:
|
||||
value = get_imm(-4.0);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
if constexpr (is_float) {
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
|
||||
}
|
||||
break;
|
||||
case OperandField::VccHi:
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if constexpr (is_float) {
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
|
@ -178,148 +267,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
return value;
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
return GetSrc<IR::U32F32>(operand, force_flt);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::F32 Translator::GetSrc(const InstOperand& operand, bool) {
|
||||
return GetSrc<IR::U32F32>(operand, true);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
||||
IR::Value value_hi{};
|
||||
IR::Value value_lo{};
|
||||
|
||||
bool immediate = false;
|
||||
const bool is_float = operand.type == ScalarType::Float64 || force_flt;
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (is_float) {
|
||||
value_lo = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
value_hi = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code + 1));
|
||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
||||
value_lo = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
value_hi = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code + 1));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (is_float) {
|
||||
value_lo = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
value_hi = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code + 1));
|
||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
||||
value_lo = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
value_hi = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code + 1));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
value_lo = ir.Imm64(0.0);
|
||||
} else {
|
||||
value_lo = ir.Imm64(u64(0U));
|
||||
}
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
ASSERT(!force_flt);
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
ASSERT(!force_flt);
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
UNREACHABLE(); // There is a literal double?
|
||||
} else {
|
||||
value_lo = ir.Imm64(u64(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
immediate = true;
|
||||
if (force_flt) {
|
||||
value_lo = ir.Imm64(1.0);
|
||||
} else {
|
||||
value_lo = ir.Imm64(std::bit_cast<u64>(f64(1.0)));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(4.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-0.5);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-1.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-2.0);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_4_0:
|
||||
immediate = true;
|
||||
value_lo = ir.Imm64(-4.0);
|
||||
break;
|
||||
case OperandField::VccLo: {
|
||||
value_lo = ir.GetVccLo();
|
||||
value_hi = ir.GetVccHi();
|
||||
} break;
|
||||
case OperandField::VccHi:
|
||||
UNREACHABLE();
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
IR::Value value;
|
||||
|
||||
if (immediate) {
|
||||
value = value_lo;
|
||||
} else if (is_float) {
|
||||
throw NotImplementedException("required OpPackDouble2x32 implementation");
|
||||
} else {
|
||||
IR::Value packed = ir.CompositeConstruct(value_lo, value_hi);
|
||||
value = ir.PackUint2x32(packed);
|
||||
}
|
||||
|
||||
if (is_float) {
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(IR::F32F64(value));
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(IR::F32F64(value));
|
||||
}
|
||||
}
|
||||
return IR::U64F64(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
||||
return GetSrc64<IR::U64F64>(operand, force_flt);
|
||||
}
|
||||
template <>
|
||||
IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) {
|
||||
return GetSrc64<IR::U64F64>(operand, true);
|
||||
}
|
||||
template IR::U64 Translator::GetSrc64<IR::U64>(const InstOperand&);
|
||||
template IR::F64 Translator::GetSrc64<IR::F64>(const InstOperand&);
|
||||
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||
IR::U32F32 result = value;
|
||||
|
|
|
@ -186,7 +186,7 @@ public:
|
|||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
@ -211,10 +211,10 @@ public:
|
|||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
private:
|
||||
template <typename T = IR::U32F32>
|
||||
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
template <typename T = IR::U64F64>
|
||||
[[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false);
|
||||
template <typename T = IR::U32>
|
||||
[[nodiscard]] T GetSrc(const InstOperand& operand);
|
||||
template <typename T = IR::U64>
|
||||
[[nodiscard]] T GetSrc64(const InstOperand& operand);
|
||||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -312,7 +311,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MOV(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
||||
}
|
||||
|
||||
void Translator::V_SAD(const GcnInst& inst) {
|
||||
|
@ -321,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true),
|
||||
GetSrc(inst.dst[0], true)));
|
||||
SetDst(inst.dst[0], ir.FPFma(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]),
|
||||
GetSrc<IR::F32>(inst.dst[0])));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true));
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
||||
}
|
||||
|
||||
|
@ -339,13 +338,13 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0 = GetSrc(inst.src[0], true);
|
||||
const IR::F32 src0 = GetSrc<IR::F32>(inst.src[0]);
|
||||
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||
SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
|
||||
}
|
||||
|
||||
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||
|
@ -354,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
|||
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
||||
? ir.GetThreadBitScalarReg(flag_reg)
|
||||
: ir.GetVcc();
|
||||
|
||||
// We can treat the instruction as integer most of the time, but when a source is
|
||||
// a floating point constant we will force the other as float for better readability
|
||||
// The other operand is also higly likely to be float as well.
|
||||
const auto is_float_const = [](OperandField field) {
|
||||
return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0;
|
||||
};
|
||||
const bool has_flt_source =
|
||||
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
||||
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
||||
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
||||
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
||||
}
|
||||
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
|
||||
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
|
||||
}
|
||||
const IR::Value result = ir.Select(flag, src1, src0);
|
||||
const IR::Value result =
|
||||
ir.Select(flag, GetSrc<IR::F32>(inst.src[1]), GetSrc<IR::F32>(inst.src[0]));
|
||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||
}
|
||||
|
||||
|
@ -448,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||
}
|
||||
|
||||
|
@ -476,9 +459,9 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||
}
|
||||
|
@ -492,32 +475,32 @@ void Translator::V_MED3_I32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUB_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_RCP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||
}
|
||||
|
||||
void Translator::V_FMA_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::U1 result = [&] {
|
||||
switch (op) {
|
||||
case ConditionOp::F:
|
||||
|
@ -557,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
|
||||
}
|
||||
|
||||
|
@ -569,40 +552,40 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_RSQ_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SIN_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPSin(src0));
|
||||
}
|
||||
|
||||
void Translator::V_LOG_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPLog2(src0));
|
||||
}
|
||||
|
||||
void Translator::V_EXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPExp2(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SQRT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPSqrt(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
|
||||
}
|
||||
|
||||
void Translator::V_MIN3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||
}
|
||||
|
||||
|
@ -614,9 +597,9 @@ void Translator::V_MIN3_I32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 k{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 k{GetSrc<IR::F32>(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
|
||||
}
|
||||
|
||||
|
@ -625,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0], true));
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
||||
}
|
||||
|
||||
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[1], true));
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
|
||||
}
|
||||
|
||||
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[2], true));
|
||||
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUBREV_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPSub(src1, src0));
|
||||
}
|
||||
|
||||
|
@ -727,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
|||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
const IR::U32 max{ir.IMax(src0, src1, false)};
|
||||
const IR::U32 min{ir.IMin(src0, src1, false)};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
|
||||
IR::U32 result;
|
||||
if (src0.IsImmediate() && src0.U32() == 0U) {
|
||||
result = src1;
|
||||
} else if (src1.IsImmediate() && src1.U32() == 0U) {
|
||||
result = src0;
|
||||
} else {
|
||||
const IR::U32 max{ir.IMax(src0, src1, false)};
|
||||
const IR::U32 min{ir.IMin(src0, src1, false)};
|
||||
result = ir.ISub(max, min);
|
||||
}
|
||||
SetDst(inst.dst[0], ir.IAdd(result, src2));
|
||||
}
|
||||
|
||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||
|
@ -783,7 +774,7 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_RNDNE_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPRoundEven(src0));
|
||||
}
|
||||
|
||||
|
@ -794,14 +785,14 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_COS_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPCos(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MAX3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||
}
|
||||
|
||||
|
@ -813,7 +804,7 @@ void Translator::V_MAX3_U32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||
}
|
||||
|
||||
|
@ -830,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
||||
}
|
||||
|
||||
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPCeil(src0));
|
||||
}
|
||||
|
||||
|
@ -899,18 +890,18 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
const IR::F32F64 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U1 value;
|
||||
if (src1.IsImmediate()) {
|
||||
|
|
|
@ -53,6 +53,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
case Opcode::IMAGE_GET_RESINFO:
|
||||
return IMAGE_GET_RESINFO(inst);
|
||||
|
||||
// Buffer load operations
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, true, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
|
@ -61,6 +62,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_LOAD_FORMAT(3, true, true, inst);
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, true, true, inst);
|
||||
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
return BUFFER_LOAD_FORMAT(1, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XY:
|
||||
|
@ -69,6 +71,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_LOAD_FORMAT(3, false, true, inst);
|
||||
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
|
||||
return BUFFER_LOAD_FORMAT(4, false, true, inst);
|
||||
|
||||
case Opcode::BUFFER_LOAD_DWORD:
|
||||
return BUFFER_LOAD_FORMAT(1, false, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX2:
|
||||
|
@ -77,16 +80,25 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_LOAD_FORMAT(3, false, false, inst);
|
||||
case Opcode::BUFFER_LOAD_DWORDX4:
|
||||
return BUFFER_LOAD_FORMAT(4, false, false, inst);
|
||||
|
||||
// Buffer store operations
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
return BUFFER_STORE_FORMAT(1, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
return BUFFER_STORE_FORMAT(2, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
return BUFFER_STORE_FORMAT(3, false, inst);
|
||||
return BUFFER_STORE_FORMAT(1, false, true, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XY:
|
||||
return BUFFER_STORE_FORMAT(2, false, true, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZ:
|
||||
return BUFFER_STORE_FORMAT(3, false, true, inst);
|
||||
case Opcode::BUFFER_STORE_FORMAT_XYZW:
|
||||
return BUFFER_STORE_FORMAT(4, false, true, inst);
|
||||
|
||||
case Opcode::BUFFER_STORE_DWORD:
|
||||
return BUFFER_STORE_FORMAT(1, false, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX2:
|
||||
return BUFFER_STORE_FORMAT(2, false, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX3:
|
||||
return BUFFER_STORE_FORMAT(3, false, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
return BUFFER_STORE_FORMAT(4, false, inst);
|
||||
return BUFFER_STORE_FORMAT(4, false, false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -135,8 +147,8 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
|
||||
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
||||
// Set Architecture
|
||||
const IR::Value offset =
|
||||
flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{};
|
||||
const IR::U32 offset =
|
||||
flags.test(MimgModifier::Offset) ? ir.GetVectorReg<IR::U32>(addr_reg++) : IR::U32{};
|
||||
const IR::F32 bias =
|
||||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||
const IR::F32 dref =
|
||||
|
@ -168,18 +180,17 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
|
||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||
const IR::Value texel = [&]() -> IR::Value {
|
||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||
if (!flags.test(MimgModifier::Pcf)) {
|
||||
if (explicit_lod) {
|
||||
return ir.ImageSampleExplicitLod(handle, body, lod, offset, info);
|
||||
return ir.ImageSampleExplicitLod(handle, body, offset, info);
|
||||
} else {
|
||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info);
|
||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, info);
|
||||
}
|
||||
}
|
||||
if (explicit_lod) {
|
||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info);
|
||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info);
|
||||
}
|
||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info);
|
||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info);
|
||||
}();
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
|
@ -251,10 +262,10 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
|||
const IR::Value texel = [&]() -> IR::Value {
|
||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||
if (!flags.test(MimgModifier::Pcf)) {
|
||||
return ir.ImageGather(handle, body, offset, {}, info);
|
||||
return ir.ImageGather(handle, body, offset, info);
|
||||
}
|
||||
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
|
||||
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
|
||||
return ir.ImageGatherDref(handle, body, offset, dref, info);
|
||||
}();
|
||||
|
||||
// For gather4 instructions dmask selects which component to read and must have
|
||||
|
@ -360,7 +371,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format,
|
||||
const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
@ -411,7 +423,11 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
|
|||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
|
||||
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
if (is_format) {
|
||||
ir.StoreBufferFormat(num_dwords, handle, address, value, info);
|
||||
} else {
|
||||
ir.StoreBuffer(num_dwords, handle, address, value, info);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
|
|
|
@ -16,18 +16,6 @@ namespace {
|
|||
UNREACHABLE_MSG("Invalid type = {}, functionName = {}, line = {}", u32(type), functionName,
|
||||
lineNumber);
|
||||
}
|
||||
|
||||
Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
|
||||
if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
|
||||
return ir.CompositeConstruct(bias_lod, lod_clamp);
|
||||
} else if (!bias_lod.IsEmpty()) {
|
||||
return bias_lod;
|
||||
} else if (!lod_clamp.IsEmpty()) {
|
||||
return lod_clamp;
|
||||
} else {
|
||||
return Value{};
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
U1 IREmitter::Imm1(bool value) const {
|
||||
|
@ -271,10 +259,6 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||
case 16:
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
|
@ -288,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
|||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
Inst(Opcode::WriteSharedU8, offset, value);
|
||||
break;
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
break;
|
||||
|
@ -369,6 +347,26 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 2:
|
||||
Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 3:
|
||||
Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 4:
|
||||
Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::LaneId() {
|
||||
return Inst<U32>(Opcode::LaneId);
|
||||
}
|
||||
|
@ -1386,41 +1384,37 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
|
|||
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias,
|
||||
const U32& offset, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset,
|
||||
TextureInstInfo info) {
|
||||
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
|
||||
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset);
|
||||
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
|
||||
const Value& offset, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info) {
|
||||
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
|
||||
return Inst<F32>(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc,
|
||||
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref,
|
||||
const F32& bias, const U32& offset,
|
||||
TextureInstInfo info) {
|
||||
return Inst<F32>(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias,
|
||||
offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
|
||||
const F32& lod, const Value& offset,
|
||||
TextureInstInfo info) {
|
||||
return Inst<F32>(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod,
|
||||
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref,
|
||||
const U32& offset, TextureInstInfo info) {
|
||||
return Inst<F32>(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{},
|
||||
offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2);
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, const F32& dref, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref);
|
||||
const F32& dref, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
|
|
|
@ -93,6 +93,8 @@ public:
|
|||
BufferInstInfo info);
|
||||
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
|
@ -241,31 +243,32 @@ public:
|
|||
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& lod, const Value& offset,
|
||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
|
||||
const F32& bias, const U32& offset,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
|
||||
|
||||
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body,
|
||||
const U32& offset, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body,
|
||||
const F32& dref, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
const U32& offset, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body,
|
||||
const F32& dref, const U32& offset,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& dref, const F32& lod,
|
||||
const Value& offset, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, TextureInstInfo info);
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
|
||||
const Value& offset, const Value& offset2, const F32& dref,
|
||||
TextureInstInfo info);
|
||||
const Value& offset, const F32& dref, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||
|
|
|
@ -55,12 +55,14 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::StoreBufferF32x2:
|
||||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferFormatF32:
|
||||
case Opcode::StoreBufferFormatF32x2:
|
||||
case Opcode::StoreBufferFormatF32x3:
|
||||
case Opcode::StoreBufferFormatF32x4:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU16:
|
||||
case Opcode::WriteSharedU8:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
|
|
|
@ -26,15 +26,9 @@ OPCODE(WorkgroupMemoryBarrier, Void,
|
|||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU8, U32, U32, )
|
||||
OPCODE(LoadSharedS8, U32, U32, )
|
||||
OPCODE(LoadSharedU16, U32, U32, )
|
||||
OPCODE(LoadSharedS16, U32, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||
|
@ -88,6 +82,10 @@ OPCODE(StoreBufferF32, Void, Opaq
|
|||
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, )
|
||||
OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
|
@ -298,12 +296,12 @@ OPCODE(ConvertU16U32, U16, U32,
|
|||
OPCODE(ConvertU32U16, U32, U16, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||
|
|
|
@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program);
|
|||
void ConstantPropagationPass(IR::BlockList& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void LowerSharedMemToRegisters(IR::Program& program) {
|
||||
boost::container::small_vector<IR::Inst*, 8> ds_writes;
|
||||
Info& info{program.info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
|
||||
ds_writes.emplace_back(&inst);
|
||||
continue;
|
||||
}
|
||||
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
|
||||
// Search for write instruction with same offset
|
||||
const IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
|
||||
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
|
||||
return write_prod->Arg(1).U32() == prod->Arg(1).U32() &&
|
||||
write_prod->Arg(0) == prod->Arg(0);
|
||||
});
|
||||
ASSERT(it != ds_writes.end());
|
||||
// Replace data read with value written.
|
||||
inst.ReplaceUsesWith((*it)->Arg(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
// We should have eliminated everything. Invalidate data write instructions.
|
||||
for (const auto inst : ds_writes) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -37,6 +37,10 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32x2:
|
||||
case IR::Opcode::StoreBufferFormatF32x3:
|
||||
case IR::Opcode::StoreBufferFormatF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
|
@ -73,6 +77,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
|||
case IR::Opcode::LoadBufferFormatF32x2:
|
||||
case IR::Opcode::LoadBufferFormatF32x3:
|
||||
case IR::Opcode::LoadBufferFormatF32x4:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32x2:
|
||||
case IR::Opcode::StoreBufferFormatF32x3:
|
||||
case IR::Opcode::StoreBufferFormatF32x4:
|
||||
switch (num_format) {
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
|
@ -112,6 +120,10 @@ bool IsBufferStore(const IR::Inst& inst) {
|
|||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32x2:
|
||||
case IR::Opcode::StoreBufferFormatF32x3:
|
||||
case IR::Opcode::StoreBufferFormatF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
|
@ -171,6 +183,22 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageGather:
|
||||
case IR::Opcode::ImageGatherDref:
|
||||
return 2;
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
return 3;
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
return 4;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
|
@ -376,9 +404,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
return -1;
|
||||
}
|
||||
// We have found this pattern. Build the sharp.
|
||||
std::array<u64, 2> buffer;
|
||||
std::array<u32, 4> buffer;
|
||||
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
||||
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
|
||||
buffer[1] = 0;
|
||||
buffer[2] = handle->Arg(2).U32();
|
||||
buffer[3] = handle->Arg(3).U32();
|
||||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||
// Assign a binding to this sharp.
|
||||
return descriptors.Add(BufferResource{
|
||||
|
@ -492,6 +522,13 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
const auto tsharp = TrackSharp(tsharp_handle);
|
||||
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.ReplaceUsesWith(
|
||||
ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f)));
|
||||
return;
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sgpr_base = tsharp.sgpr_base,
|
||||
|
@ -565,25 +602,43 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
|
||||
if (inst_info.has_offset) {
|
||||
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
||||
const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageGatherDref;
|
||||
const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3);
|
||||
const u32 arg_pos = ImageOffsetArgumentPosition(inst);
|
||||
const IR::Value arg = inst.Arg(arg_pos);
|
||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||
const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); };
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
BitField<16, 6, u32> z;
|
||||
} offset{arg.U32()};
|
||||
const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y));
|
||||
inst.SetArg(arg_pos, value);
|
||||
|
||||
const auto read = [&](u32 offset) -> auto {
|
||||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
||||
};
|
||||
|
||||
switch (image.GetType()) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
inst.SetArg(arg_pos, read(0));
|
||||
break;
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8)));
|
||||
break;
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16)));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (inst_info.has_lod_clamp) {
|
||||
// Final argument contains lod_clamp
|
||||
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||
const u32 arg_pos = [&]() -> u32 {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
return 2;
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
return 3;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return inst_info.is_depth ? 5 : 4;
|
||||
}();
|
||||
inst.SetArg(arg_pos, arg);
|
||||
}
|
||||
if (inst_info.explicit_lod) {
|
||||
|
@ -591,7 +646,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod);
|
||||
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3;
|
||||
inst.SetArg(pos, arg);
|
||||
const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg;
|
||||
inst.SetArg(pos, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,18 +16,6 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedS8:
|
||||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
|
|
@ -58,6 +58,9 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
|||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::ResourceTrackingPass(program);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
if (program.info.stage != Stage::Compute) {
|
||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
|
|
|
@ -116,7 +116,7 @@ struct PushData {
|
|||
std::array<u8, 32> buf_offsets;
|
||||
|
||||
void AddOffset(u32 binding, u32 offset) {
|
||||
ASSERT(offset < 64 && binding < 32);
|
||||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||
buf_offsets[binding] = offset;
|
||||
}
|
||||
};
|
||||
|
@ -195,8 +195,6 @@ struct Info {
|
|||
bool has_image_query{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared{};
|
||||
bool uses_shared_u8{};
|
||||
bool uses_shared_u16{};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
|
|
@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
{
|
||||
std::unique_lock lk{submit_mutex};
|
||||
Common::CondvarWait(submit_cv, lk, stoken,
|
||||
[this] { return num_submits != 0 || submit_done; });
|
||||
[this] { return num_commands || num_submits || submit_done; });
|
||||
}
|
||||
if (stoken.stop_requested()) {
|
||||
break;
|
||||
|
@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
|
||||
int qid = -1;
|
||||
|
||||
while (num_submits) {
|
||||
while (num_submits || num_commands) {
|
||||
|
||||
// Process incoming commands with high priority
|
||||
while (num_commands) {
|
||||
|
||||
Common::UniqueFunction<void> callback{};
|
||||
{
|
||||
std::unique_lock lk{submit_mutex};
|
||||
callback = std::move(command_queue.back());
|
||||
command_queue.pop();
|
||||
}
|
||||
|
||||
callback();
|
||||
|
||||
--num_commands;
|
||||
}
|
||||
|
||||
qid = (qid + 1) % NumTotalQueues;
|
||||
|
||||
auto& queue = mapped_queues[qid];
|
||||
|
@ -180,6 +196,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPush: {
|
||||
const auto marker_sz = nop->header.count.Value() * 2;
|
||||
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
|
||||
marker_sz};
|
||||
rasterizer->ScopeMarkerBegin(label);
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPop: {
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -208,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
// In the case of HW, render target memory has alignment as color block operates on
|
||||
// tiles. There is no information of actual resource extents stored in CB context
|
||||
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
|
||||
// The same applies to the depth targets. Fortunatelly, the guest always sends
|
||||
// The same applies to the depth targets. Fortunately, the guest always sends
|
||||
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
|
||||
// below and extract the hint to determine actual resource dims.
|
||||
|
||||
|
@ -226,7 +253,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
ASSERT(col_buf_id < NumColorBuffers);
|
||||
|
||||
const auto nop_offset = header->type3.count;
|
||||
if (nop_offset == 0x0e || nop_offset == 0x0d) {
|
||||
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
|
||||
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
|
||||
"NOP hint is missing in CB setup sequence");
|
||||
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
|
||||
|
@ -295,8 +322,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndex2", reinterpret_cast<const void*>(dcb.data())));
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
|
@ -308,8 +336,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index_off->index_count;
|
||||
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||
if (rasterizer) {
|
||||
rasterizer->ScopeMarkerBegin(fmt::format(
|
||||
"dcb:{}:DrawIndexOffset2", reinterpret_cast<const void*>(dcb.data())));
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(true, draw_index_off->index_offset);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
|
@ -320,8 +349,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (rasterizer) {
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast<const void*>(dcb.data())));
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->Draw(false);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
|
@ -334,8 +364,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:Dispatch", reinterpret_cast<const void*>(dcb.data())));
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
|
@ -393,7 +424,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
|
@ -486,8 +517,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
||||
rasterizer->ScopeMarkerBegin(fmt::format(
|
||||
"acb[{}]:{}:Dispatch", vqid, reinterpret_cast<const void*>(acb.data())));
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||
rasterizer->Breadcrumb(u64(cmd_address));
|
||||
rasterizer->DispatchDirect();
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
|
|
|
@ -11,10 +11,12 @@
|
|||
#include <span>
|
||||
#include <thread>
|
||||
#include <queue>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
|
@ -766,7 +768,8 @@ struct Liverpool {
|
|||
}
|
||||
|
||||
TilingMode GetTilingMode() const {
|
||||
return attrib.tile_mode_index;
|
||||
return info.linear_general ? TilingMode::Display_Linear
|
||||
: attrib.tile_mode_index.Value();
|
||||
}
|
||||
|
||||
bool IsTiled() const {
|
||||
|
@ -866,6 +869,33 @@ struct Liverpool {
|
|||
}
|
||||
};
|
||||
|
||||
union ShaderStageEnable {
|
||||
u32 raw;
|
||||
BitField<0, 2, u32> ls_en;
|
||||
BitField<2, 1, u32> hs_en;
|
||||
BitField<3, 2, u32> es_en;
|
||||
BitField<5, 1, u32> gs_en;
|
||||
BitField<6, 1, u32> vs_en;
|
||||
|
||||
bool IsStageEnabled(u32 stage) {
|
||||
switch (stage) {
|
||||
case 0:
|
||||
case 1:
|
||||
return true;
|
||||
case 2:
|
||||
return gs_en.Value();
|
||||
case 3:
|
||||
return es_en.Value();
|
||||
case 4:
|
||||
return hs_en.Value();
|
||||
case 5:
|
||||
return ls_en.Value();
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
union Regs {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x2C08);
|
||||
|
@ -944,7 +974,9 @@ struct Liverpool {
|
|||
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
|
||||
u32 vgt_instance_step_rate_0;
|
||||
u32 vgt_instance_step_rate_1;
|
||||
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1);
|
||||
ShaderStageEnable stage_enable;
|
||||
INSERT_PADDING_WORDS(9);
|
||||
PolygonOffset poly_offset;
|
||||
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
|
||||
AaConfig aa_config;
|
||||
|
@ -1024,6 +1056,13 @@ public:
|
|||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void SendCommand(Common::UniqueFunction<void>&& func) {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
command_queue.emplace(std::move(func));
|
||||
++num_commands;
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
||||
private:
|
||||
struct Task {
|
||||
struct promise_type {
|
||||
|
@ -1092,9 +1131,11 @@ private:
|
|||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||
std::jthread process_thread{};
|
||||
std::atomic<u32> num_submits{};
|
||||
std::atomic<u32> num_commands{};
|
||||
std::atomic<bool> submit_done{};
|
||||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
|
@ -1139,6 +1180,7 @@ static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
|||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||
|
|
|
@ -282,6 +282,13 @@ enum class InterruptSelect : u32 {
|
|||
IrqUndocumented = 3,
|
||||
};
|
||||
|
||||
static u64 GetGpuClock64() {
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
auto duration = now.time_since_epoch();
|
||||
auto ticks = std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
|
||||
return static_cast<u64>(ticks);
|
||||
}
|
||||
|
||||
struct PM4CmdEventWriteEop {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
|
@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop {
|
|||
*Address<u64>() = DataQWord();
|
||||
break;
|
||||
}
|
||||
case DataSelect::GpuClock64: {
|
||||
*Address<u64>() = GetGpuClock64();
|
||||
break;
|
||||
}
|
||||
case DataSelect::PerfCounter: {
|
||||
*Address<u64>() = Common::FencedRDTSC();
|
||||
break;
|
||||
|
@ -652,13 +663,6 @@ struct PM4CmdReleaseMem {
|
|||
return data_lo | u64(data_hi) << 32;
|
||||
}
|
||||
|
||||
uint64_t GetGpuClock64() const {
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
auto duration = now.time_since_epoch();
|
||||
auto ticks = std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
|
||||
return static_cast<uint64_t>(ticks);
|
||||
}
|
||||
|
||||
void SignalFence(Platform::InterruptId irq_id) const {
|
||||
switch (data_sel.Value()) {
|
||||
case DataSelect::Data32Low: {
|
||||
|
|
|
@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 {
|
|||
CondIndirectBuffer = 0x3F,
|
||||
CopyData = 0x40,
|
||||
CommandProcessorDma = 0x41,
|
||||
PfpSyncMe = 0x42,
|
||||
SurfaceSync = 0x43,
|
||||
CondWrite = 0x45,
|
||||
EventWrite = 0x46,
|
||||
|
|
|
@ -106,10 +106,8 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_
|
|||
VmaAllocationInfo alloc_info{};
|
||||
buffer.Create(buffer_ci, usage, &alloc_info);
|
||||
|
||||
if (instance->HasDebuggingToolAttached()) {
|
||||
const auto device = instance->GetDevice();
|
||||
Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024);
|
||||
}
|
||||
const auto device = instance->GetDevice();
|
||||
Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes);
|
||||
|
||||
// Map it if it is host visible.
|
||||
VkMemoryPropertyFlags property_flags{};
|
||||
|
@ -152,10 +150,8 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler&
|
|||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
const auto device = instance.GetDevice();
|
||||
if (instance.HasDebuggingToolAttached()) {
|
||||
Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage),
|
||||
size_bytes / 1024);
|
||||
}
|
||||
Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage),
|
||||
size_bytes);
|
||||
}
|
||||
|
||||
std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
|
|
|
@ -146,6 +146,10 @@ public:
|
|||
return offset;
|
||||
}
|
||||
|
||||
u64 GetFreeSize() const {
|
||||
return size_bytes - offset - mapped_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Watch {
|
||||
u64 tick{};
|
||||
|
|
|
@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||
}
|
||||
|
||||
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
|
||||
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
|
||||
SCOPE_EXIT {
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.setVertexInputEXT(bindings, attributes);
|
||||
}
|
||||
};
|
||||
|
||||
if (vs_info.vs_inputs.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||
attributes.push_back({
|
||||
.location = input.binding,
|
||||
.binding = input.binding,
|
||||
.format =
|
||||
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||
.offset = 0,
|
||||
});
|
||||
bindings.push_back({
|
||||
.binding = input.binding,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
.divisor = 1,
|
||||
});
|
||||
}
|
||||
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
|
@ -224,6 +248,19 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
|||
return {&buffer, buffer.Offset(device_addr)};
|
||||
}
|
||||
|
||||
std::pair<const Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
|
||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||
const BufferId buffer_id = page_table[page];
|
||||
if (buffer_id) {
|
||||
const Buffer& buffer = slot_buffers[buffer_id];
|
||||
if (buffer.IsInBounds(gpu_addr, size)) {
|
||||
return {&buffer, buffer.Offset(gpu_addr)};
|
||||
}
|
||||
}
|
||||
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
|
||||
return {&staging_buffer, offset};
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||
const VAddr end_addr = addr + size;
|
||||
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
||||
|
@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
|||
return memory_tracker.IsRegionCpuModified(addr, size);
|
||||
}
|
||||
|
||||
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||
return memory_tracker.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||
if (device_addr == 0) {
|
||||
return NULL_BUFFER_ID;
|
||||
|
|
|
@ -69,12 +69,18 @@ public:
|
|||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written);
|
||||
|
||||
/// Obtains a temporary buffer for usage in texture cache.
|
||||
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
|
||||
|
||||
/// Return true when a region is registered on the cache
|
||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||
|
||||
/// Return true when a CPU region is modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||
|
||||
/// Return true when a CPU region is modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
|
|
|
@ -81,6 +81,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
|
|||
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
|
||||
case Liverpool::PrimitiveType::AdjTriangleStrip:
|
||||
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
|
||||
case Liverpool::PrimitiveType::PatchPrimitive:
|
||||
return vk::PrimitiveTopology::ePatchList;
|
||||
case Liverpool::PrimitiveType::QuadList:
|
||||
// Needs to generate index buffer on the fly.
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
|
@ -339,6 +341,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||
vk::Format::eR32Sint,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eBc6HSfloatBlock,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16B16A16Sscaled,
|
||||
vk::Format::eR16G16Sscaled,
|
||||
|
@ -540,6 +543,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eBc6HUfloatBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||
return vk::Format::eBc6HSfloatBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR8G8B8A8Sint;
|
||||
|
|
|
@ -47,21 +47,22 @@ public:
|
|||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address, bool is_eop) {
|
||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(info, false);
|
||||
const auto image_id = texture_cache.FindImage(info);
|
||||
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
return PrepareFrameInternal(image, is_eop);
|
||||
}
|
||||
|
||||
Frame* PrepareBlankFrame() {
|
||||
Frame* PrepareBlankFrame(bool is_eop) {
|
||||
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
||||
return PrepareFrameInternal(image, true);
|
||||
return PrepareFrameInternal(image, is_eop);
|
||||
}
|
||||
|
||||
VideoCore::Image& RegisterVideoOutSurface(
|
||||
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
||||
vo_buffers_addr.emplace_back(cpu_address);
|
||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(info, false);
|
||||
const auto image_id = texture_cache.FindImage(info);
|
||||
return texture_cache.GetImage(image_id);
|
||||
}
|
||||
|
||||
|
@ -75,6 +76,11 @@ public:
|
|||
void Present(Frame* frame);
|
||||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
|
||||
void FlushDraw() {
|
||||
SubmitInfo info{};
|
||||
draw_scheduler.Flush(info);
|
||||
}
|
||||
|
||||
private:
|
||||
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
|
||||
Frame* GetRenderFrame();
|
||||
|
|
|
@ -96,7 +96,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
Shader::PushData push_data{};
|
||||
u32 binding{};
|
||||
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
for (const auto& buffer : info.buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(info);
|
||||
const VAddr address = vsharp.base_address;
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
||||
|
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (buffer.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size);
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment =
|
||||
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
|
@ -137,7 +137,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
i++;
|
||||
}
|
||||
|
||||
for (const auto& image_desc : info.images) {
|
||||
|
|
|
@ -145,6 +145,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
|
||||
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
|
||||
}
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
|
||||
}
|
||||
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <fmt/ranges.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
@ -163,7 +164,8 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features,
|
||||
vk::PhysicalDeviceVulkan13Features,
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT,
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
|
||||
|
@ -197,10 +199,12 @@ bool Instance::CreateDevice() {
|
|||
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
const bool depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
workgroup_memory_explicit_layout =
|
||||
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
|
||||
// The next two extensions are required to be available together in order to support write masks
|
||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
|
@ -210,9 +214,21 @@ bool Instance::CreateDevice() {
|
|||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
|
||||
const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
|
||||
if (has_sync2) {
|
||||
has_nv_checkpoints = Config::isMarkersEnabled()
|
||||
? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME)
|
||||
: false;
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Required by Vulkan spec if supported.
|
||||
add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
|
||||
#endif
|
||||
|
||||
const auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
|
@ -308,21 +324,51 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceRobustness2FeaturesEXT{
|
||||
.nullDescriptor = true,
|
||||
},
|
||||
vk::PhysicalDeviceSynchronization2Features{
|
||||
.synchronization2 = true,
|
||||
},
|
||||
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
|
||||
.vertexInputDynamicState = true,
|
||||
},
|
||||
#ifdef __APPLE__
|
||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||
#endif
|
||||
};
|
||||
|
||||
if (!maintenance4) {
|
||||
device_chain.unlink<vk::PhysicalDeviceMaintenance4FeaturesKHR>();
|
||||
}
|
||||
if (!custom_border_color) {
|
||||
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
|
||||
}
|
||||
if (!color_write_en) {
|
||||
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();
|
||||
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
||||
}
|
||||
if (!robustness) {
|
||||
if (!depth_clip_control) {
|
||||
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
}
|
||||
if (!workgroup_memory_explicit_layout) {
|
||||
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||
}
|
||||
if (robustness) {
|
||||
device_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor =
|
||||
feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor;
|
||||
} else {
|
||||
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
||||
}
|
||||
if (!vertex_input_dynamic_state) {
|
||||
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
|
||||
}
|
||||
|
||||
try {
|
||||
device = physical_device.createDeviceUnique(device_chain.get());
|
||||
} catch (vk::ExtensionNotPresentError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what());
|
||||
return false;
|
||||
} catch (vk::FeatureNotPresentError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Some required features are not available {}", err.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(*device);
|
||||
|
|
|
@ -88,6 +88,10 @@ public:
|
|||
return profiler_context;
|
||||
}
|
||||
|
||||
bool HasNvCheckpoints() const {
|
||||
return has_nv_checkpoints;
|
||||
}
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
|
@ -128,6 +132,11 @@ public:
|
|||
return color_write_en;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
|
||||
bool IsVertexInputDynamicState() const {
|
||||
return vertex_input_dynamic_state;
|
||||
}
|
||||
|
||||
/// Returns the vendor ID of the physical device
|
||||
u32 GetVendorID() const {
|
||||
return properties.vendorID;
|
||||
|
@ -253,12 +262,14 @@ private:
|
|||
bool external_memory_host{};
|
||||
bool workgroup_memory_explicit_layout{};
|
||||
bool color_write_en{};
|
||||
bool vertex_input_dynamic_state{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
bool has_renderdoc{};
|
||||
bool has_nv_checkpoints{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -115,6 +115,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
}
|
||||
|
||||
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
|
||||
return nullptr;
|
||||
}
|
||||
RefreshGraphicsKey();
|
||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||
if (is_new) {
|
||||
|
@ -203,12 +207,20 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
}
|
||||
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!regs.stage_enable.IsStageEnabled(i)) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||
if (!bininfo->Valid()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
continue;
|
||||
}
|
||||
key.stage_hashes[i] = bininfo->shader_hash;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,6 +157,10 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
|
|||
break;
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
|
||||
#endif
|
||||
|
||||
if (window_type != Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
|
@ -221,12 +225,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT
|
|||
|
||||
vk::Bool32 enable_sync =
|
||||
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
vk::LayerSettingEXT layer_set = {
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "validate_sync",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_sync,
|
||||
vk::Bool32 enable_gpuav =
|
||||
enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False;
|
||||
const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled()
|
||||
? "GPU_BASED_GPU_ASSISTED"
|
||||
: "GPU_BASED_NONE";
|
||||
const std::array layer_setings = {
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "validate_sync",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_sync,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "sync_queue_submit",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_sync,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "validate_gpu_based",
|
||||
.type = vk::LayerSettingTypeEXT::eString,
|
||||
.valueCount = 1,
|
||||
.pValues = &gpuav_mode,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "gpuav_reserve_binding_slot",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_gpuav,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "gpuav_descriptor_checks",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_gpuav,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "gpuav_validate_indirect_buffer",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_gpuav,
|
||||
},
|
||||
vk::LayerSettingEXT{
|
||||
.pLayerName = VALIDATION_LAYER_NAME,
|
||||
.pSettingName = "gpuav_buffer_copies",
|
||||
.type = vk::LayerSettingTypeEXT::eBool32,
|
||||
.valueCount = 1,
|
||||
.pValues = &enable_gpuav,
|
||||
},
|
||||
};
|
||||
|
||||
vk::StructureChain<vk::InstanceCreateInfo, vk::LayerSettingsCreateInfoEXT> instance_ci_chain = {
|
||||
|
@ -236,10 +289,13 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT
|
|||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = static_cast<u32>(extensions.size()),
|
||||
.ppEnabledExtensionNames = extensions.data(),
|
||||
#ifdef __APPLE__
|
||||
.flags = vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR,
|
||||
#endif
|
||||
},
|
||||
vk::LayerSettingsCreateInfoEXT{
|
||||
.settingCount = 1,
|
||||
.pSettings = &layer_set,
|
||||
.settingCount = layer_setings.size(),
|
||||
.pSettings = layer_setings.data(),
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() {
|
|||
.stencil = regs.stencil_clear}},
|
||||
};
|
||||
texture_cache.TouchMeta(htile_address, false);
|
||||
state.num_depth_attachments++;
|
||||
state.has_depth = true;
|
||||
state.has_stencil = image.info.usage.stencil;
|
||||
}
|
||||
scheduler.BeginRendering(state);
|
||||
}
|
||||
|
@ -230,16 +231,42 @@ void Rasterizer::UpdateDepthStencilState() {
|
|||
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
|
||||
}
|
||||
|
||||
void Rasterizer::ScopeMarkerBegin(const std::string& str) {
|
||||
void Rasterizer::ScopeMarkerBegin(const std::string_view& str) {
|
||||
if (!Config::isMarkersEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = str.c_str(),
|
||||
.pLabelName = str.data(),
|
||||
});
|
||||
}
|
||||
|
||||
void Rasterizer::ScopeMarkerEnd() {
|
||||
if (!Config::isMarkersEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.endDebugUtilsLabelEXT();
|
||||
}
|
||||
|
||||
void Rasterizer::ScopedMarkerInsert(const std::string_view& str) {
|
||||
if (!Config::isMarkersEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{
|
||||
.pLabelName = str.data(),
|
||||
});
|
||||
}
|
||||
|
||||
void Rasterizer::Breadcrumb(u64 id) {
|
||||
if (!instance.HasNvCheckpoints()) {
|
||||
return;
|
||||
}
|
||||
scheduler.CommandBuffer().setCheckpointNV(id);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -35,8 +35,10 @@ public:
|
|||
|
||||
void DispatchDirect();
|
||||
|
||||
void ScopeMarkerBegin(const std::string& str);
|
||||
void ScopeMarkerBegin(const std::string_view& str);
|
||||
void ScopeMarkerEnd();
|
||||
void ScopedMarkerInsert(const std::string_view& str);
|
||||
void Breadcrumb(u64 id);
|
||||
|
||||
void InvalidateMemory(VAddr addr, u64 size);
|
||||
void MapMemory(VAddr addr, u64 size);
|
||||
|
|
|
@ -38,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
|
|||
.layerCount = 1,
|
||||
.colorAttachmentCount = render_state.num_color_attachments,
|
||||
.pColorAttachments = render_state.color_attachments.data(),
|
||||
.pDepthAttachment =
|
||||
render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr,
|
||||
.pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr,
|
||||
};
|
||||
|
||||
current_cmdbuf.beginRendering(rendering_info);
|
||||
|
@ -50,6 +49,8 @@ void Scheduler::EndRendering() {
|
|||
return;
|
||||
}
|
||||
is_rendering = false;
|
||||
current_cmdbuf.endRendering();
|
||||
|
||||
boost::container::static_vector<vk::ImageMemoryBarrier, 9> barriers;
|
||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
|
@ -70,10 +71,35 @@ void Scheduler::EndRendering() {
|
|||
},
|
||||
});
|
||||
}
|
||||
current_cmdbuf.endRendering();
|
||||
if (render_state.has_depth) {
|
||||
barriers.push_back(vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = render_state.depth_attachment.imageLayout,
|
||||
.newLayout = render_state.depth_attachment.imageLayout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = render_state.depth_image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth |
|
||||
(render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil
|
||||
: vk::ImageAspectFlagBits::eNone),
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if (!barriers.empty()) {
|
||||
current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eFragmentShader,
|
||||
const auto src_stages =
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
(render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests
|
||||
: vk::PipelineStageFlagBits::eNone);
|
||||
current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
|
||||
}
|
||||
}
|
||||
|
@ -158,6 +184,13 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
|
|||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, info.fence);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
if (instance.HasNvCheckpoints()) {
|
||||
const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV();
|
||||
for (const auto& cp : checkpoint_data) {
|
||||
LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage),
|
||||
reinterpret_cast<u64>(cp.pCheckpointMarker));
|
||||
}
|
||||
}
|
||||
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,8 @@ struct RenderState {
|
|||
vk::RenderingAttachmentInfo depth_attachment{};
|
||||
vk::Image depth_image{};
|
||||
u32 num_color_attachments{};
|
||||
u32 num_depth_attachments{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
u32 width = std::numeric_limits<u32>::max();
|
||||
u32 height = std::numeric_limits<u32>::max();
|
||||
|
||||
|
|
|
@ -37,6 +37,16 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
|||
instance.GetPresentQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface);
|
||||
const auto find_mode = [&modes](vk::PresentModeKHR requested) {
|
||||
const auto it =
|
||||
std::find_if(modes.begin(), modes.end(),
|
||||
[&requested](vk::PresentModeKHR mode) { return mode == requested; });
|
||||
|
||||
return it != modes.end();
|
||||
};
|
||||
const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox);
|
||||
|
||||
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
|
||||
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
|
||||
const vk::SharingMode sharing_mode =
|
||||
|
@ -55,7 +65,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
|||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.compositeAlpha = composite_alpha,
|
||||
.presentMode = vk::PresentModeKHR::eMailbox,
|
||||
.presentMode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate,
|
||||
.clipped = true,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
|
@ -83,6 +93,7 @@ bool Swapchain::AcquireNextImage() {
|
|||
case vk::Result::eSuboptimalKHR:
|
||||
case vk::Result::eErrorSurfaceLostKHR:
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
case vk::Result::eErrorUnknown:
|
||||
needs_recreation = true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -116,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
|
||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||
mip_hashes.resize(info.resources.levels);
|
||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
||||
// the texture cache should re-create the resource with the usage requested
|
||||
|
@ -154,6 +156,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
|||
};
|
||||
|
||||
image.Create(image_ci);
|
||||
|
||||
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}",
|
||||
info.guest_address, info.guest_size_bytes);
|
||||
}
|
||||
|
||||
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,
|
||||
|
|
|
@ -111,6 +111,7 @@ struct Image {
|
|||
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
||||
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
boost::container::small_vector<u64, 14> mip_hashes;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
|||
resources.layers = num_slices;
|
||||
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
||||
usage.depth_target = true;
|
||||
usage.stencil =
|
||||
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
|
||||
|
||||
guest_address = buffer.Address();
|
||||
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
||||
|
@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||
case AmdGpu::TilingMode::Display_MacroTiled:
|
||||
case AmdGpu::TilingMode::Texture_MacroTiled:
|
||||
case AmdGpu::TilingMode::Depth_MacroTiled: {
|
||||
// ASSERT(!props.is_cube && !props.is_block);
|
||||
ASSERT(!props.is_block);
|
||||
ASSERT(num_samples == 1);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
|
||||
|
|
|
@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer,
|
|||
bool is_vo_surface) noexcept {
|
||||
const auto base_format =
|
||||
Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat());
|
||||
range.base.layer = col_buffer.view.slice_start;
|
||||
range.extent.layers = col_buffer.NumSlices();
|
||||
format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(
|
||||
base_format, col_buffer.info.comp_swap.Value(), is_vo_surface);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <xxhash.h>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -11,13 +12,11 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr u64 StreamBufferSize = 512_MB;
|
||||
static constexpr u64 PageShift = 12;
|
||||
|
||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
BufferCache& buffer_cache_, PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
||||
staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize},
|
||||
tile_manager{instance, scheduler} {
|
||||
ImageInfo info;
|
||||
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
||||
|
@ -31,9 +30,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
|||
|
||||
TextureCache::~TextureCache() = default;
|
||||
|
||||
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
|
||||
void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) {
|
||||
std::unique_lock lock{mutex};
|
||||
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
||||
if (from_compute && !image.Overlaps(address, size)) {
|
||||
return;
|
||||
}
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
// Untrack image, so the range is unprotected and the guest can write freely.
|
||||
|
@ -57,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
|
|||
}
|
||||
}
|
||||
|
||||
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
||||
ImageId TextureCache::FindImage(const ImageInfo& info) {
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
|
@ -87,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
|||
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
||||
}
|
||||
|
||||
Image& image = slot_images[image_id];
|
||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
||||
RefreshImage(image);
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
|
||||
return image_id;
|
||||
}
|
||||
|
||||
|
@ -119,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(info);
|
||||
UpdateImage(image_id);
|
||||
Image& image = slot_images[image_id];
|
||||
auto& usage = image.info.usage;
|
||||
|
||||
|
@ -165,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
|||
const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(image_info);
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
UpdateImage(image_id);
|
||||
|
||||
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||
|
@ -198,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
|||
|
||||
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info) {
|
||||
const ImageId image_id = FindImage(image_info, false);
|
||||
const ImageId image_id = FindImage(image_info);
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
|
@ -224,26 +223,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
|||
return RegisterImageView(image_id, view_info);
|
||||
}
|
||||
|
||||
void TextureCache::RefreshImage(Image& image) {
|
||||
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
|
||||
// Mark image as validated.
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
|
||||
scheduler.EndRendering();
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
||||
|
||||
vk::Buffer buffer{staging.Handle()};
|
||||
u32 offset{0};
|
||||
|
||||
auto upload_buffer = tile_manager.TryDetile(image);
|
||||
if (upload_buffer) {
|
||||
buffer = *upload_buffer;
|
||||
} else {
|
||||
// Upload data to the staging buffer.
|
||||
offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16);
|
||||
}
|
||||
|
||||
const auto& num_layers = image.info.resources.layers;
|
||||
const auto& num_mips = image.info.resources.levels;
|
||||
ASSERT(num_mips == image.info.mips_layout.size());
|
||||
|
@ -254,12 +237,23 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||
const u32 depth =
|
||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||
const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||
|
||||
// Protect GPU modified resources from accidental reuploads.
|
||||
if (True(image.flags & ImageFlagBits::GpuModified) &&
|
||||
!buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) {
|
||||
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
|
||||
const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size);
|
||||
if (image.mip_hashes[m] == hash) {
|
||||
continue;
|
||||
}
|
||||
image.mip_hashes[m] = hash;
|
||||
}
|
||||
|
||||
image_copy.push_back({
|
||||
.bufferOffset = offset + mip_ofs * num_layers,
|
||||
.bufferRowLength = static_cast<uint32_t>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<uint32_t>(mip_height),
|
||||
.bufferOffset = mip_ofs * num_layers,
|
||||
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||
.imageSubresource{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = m,
|
||||
|
@ -271,6 +265,32 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
});
|
||||
}
|
||||
|
||||
if (image_copy.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
|
||||
sched_ptr->EndRendering();
|
||||
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
|
||||
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size_bytes;
|
||||
vk::Buffer buffer{};
|
||||
u32 offset{};
|
||||
if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) {
|
||||
buffer = *upload_buffer;
|
||||
} else {
|
||||
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
|
||||
buffer = vk_buffer->Handle();
|
||||
offset = buf_offset;
|
||||
}
|
||||
|
||||
for (auto& copy : image_copy) {
|
||||
copy.bufferOffset += offset;
|
||||
}
|
||||
|
||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,13 +38,13 @@ public:
|
|||
~TextureCache();
|
||||
|
||||
/// Invalidates any image in the logical page range.
|
||||
void InvalidateMemory(VAddr address, size_t size);
|
||||
void InvalidateMemory(VAddr address, size_t size, bool from_compute = false);
|
||||
|
||||
/// Evicts any images that overlap the unmapped range.
|
||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Retrieves the image handle of the image with the provided attributes.
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
||||
|
@ -58,8 +58,18 @@ public:
|
|||
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info,
|
||||
const ImageViewInfo& view_info);
|
||||
|
||||
/// Updates image contents if it was modified by CPU.
|
||||
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||
return;
|
||||
}
|
||||
RefreshImage(image, custom_scheduler);
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
|
||||
/// Reuploads image contents.
|
||||
void RefreshImage(Image& image);
|
||||
void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
|
||||
|
||||
/// Retrieves the sampler that matches the provided S# descriptor.
|
||||
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
||||
|
@ -170,7 +180,6 @@ private:
|
|||
Vulkan::Scheduler& scheduler;
|
||||
BufferCache& buffer_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging;
|
||||
TileManager tile_manager;
|
||||
Common::SlotVector<Image> slot_images;
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/tile_manager.h"
|
||||
|
||||
#include "video_core/host_shaders/detile_m32x1_comp.h"
|
||||
|
@ -187,6 +186,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
case vk::Format::eR16G16Sfloat:
|
||||
case vk::Format::eR16G16Unorm:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
|
@ -194,6 +194,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eR32G32Sfloat:
|
||||
case vk::Format::eR32G32Uint:
|
||||
case vk::Format::eR16G16B16A16Unorm:
|
||||
case vk::Format::eR16G16B16A16Uint:
|
||||
case vk::Format::eR16G16B16A16Sfloat:
|
||||
return vk::Format::eR32G32Uint;
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
|
@ -397,7 +399,7 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
|||
const u32 image_size = image.info.guest_size_bytes;
|
||||
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
|
||||
// Use stream buffer for smaller textures.
|
||||
if (image_size <= StreamBufferSize) {
|
||||
if (image_size <= stream_buffer.GetFreeSize()) {
|
||||
u32 offset = stream_buffer.Copy(image.info.guest_address, image_size);
|
||||
return {stream_buffer.Handle(), offset};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue