diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index a86c393f..08353fb3 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -59,6 +59,7 @@ endif() # SDL3 if (NOT TARGET SDL3::SDL3) + set(SDL_PIPEWIRE OFF) add_subdirectory(sdl3) endif() diff --git a/src/common/io_file.h b/src/common/io_file.h index 7cb3246c..2c3df3f6 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -205,9 +205,9 @@ public: return WriteSpan(string); } - static void WriteBytes(const std::filesystem::path path, std::span vec) { + static void WriteBytes(const std::filesystem::path path, std::span data) { IOFile out(path, FileAccessMode::Write); - out.Write(vec); + out.Write(data); } private: diff --git a/src/core/file_format/trp.cpp b/src/core/file_format/trp.cpp index cb55af2e..b4d4c95e 100644 --- a/src/core/file_format/trp.cpp +++ b/src/core/file_format/trp.cpp @@ -48,7 +48,7 @@ bool TRP::Extract(std::filesystem::path trophyPath) { return false; s64 seekPos = sizeof(TrpHeader); - std::filesystem::path trpFilesPath(std::filesystem::current_path() / "game_data" / + std::filesystem::path trpFilesPath(std::filesystem::current_path() / "user/game_data" / title / "TrophyFiles" / it.path().stem()); std::filesystem::create_directories(trpFilesPath / "Icons"); std::filesystem::create_directory(trpFilesPath / "Xml"); @@ -88,4 +88,4 @@ bool TRP::Extract(std::filesystem::path trophyPath) { index++; } return true; -} \ No newline at end of file +} diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index e06a4b6e..2f57c9f3 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -11,18 +11,12 @@ constexpr int RESERVED_HANDLES = 3; // First 3 handles are stdin,stdout,stderr void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder) { std::scoped_lock lock{m_mutex}; - - MntPair pair; - pair.host_path = host_folder.string(); - std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/'); - pair.guest_path = guest_folder; - - m_mnt_pairs.push_back(pair); + m_mnt_pairs.emplace_back(host_folder, guest_folder); } void MntPoints::Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder) { auto it = std::remove_if(m_mnt_pairs.begin(), m_mnt_pairs.end(), - [&](const MntPair& pair) { return pair.guest_path == guest_folder; }); + [&](const MntPair& pair) { return pair.mount == guest_folder; }); m_mnt_pairs.erase(it, m_mnt_pairs.end()); } @@ -31,47 +25,83 @@ void MntPoints::UnmountAll() { m_mnt_pairs.clear(); } -std::string MntPoints::GetHostDirectory(const std::string& guest_directory) { - std::scoped_lock lock{m_mutex}; - for (auto& pair : m_mnt_pairs) { - // horrible code but it works :D - int find = guest_directory.find(pair.guest_path); - if (find == 0) { - std::string npath = - guest_directory.substr(pair.guest_path.size(), guest_directory.size() - 1); - std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/'); - return pair.host_path + npath; - } +std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) { + const MntPair* mount = GetMount(guest_directory); + if (!mount) { + return guest_directory; } - return ""; -} -std::string MntPoints::GetHostFile(const std::string& guest_file) { - std::scoped_lock lock{m_mutex}; + // Nothing to do if getting the mount itself. + if (guest_directory == mount->mount) { + return mount->host_path; + } - for (auto& pair : m_mnt_pairs) { - // horrible code but it works :D - int find = guest_file.find(pair.guest_path); - if (find != 0) { - continue; - } - std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1); - const auto host_path = pair.host_path + npath; -#ifndef _WIN64 - const std::filesystem::path path{host_path}; - if (!std::filesystem::exists(path)) { - const auto filename = Common::ToLower(path.filename()); - for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) { - const auto exist_filename = Common::ToLower(file.path().filename()); - if (filename == exist_filename) { - return file.path(); - } - } - } -#endif + // Remove device (e.g /app0) from path to retrieve relative path. + u32 pos = mount->mount.size() + 1; + // Evil games like Turok2 pass double slashes e.g /app0//game.kpf + if (guest_directory[pos] == '/') { + pos++; + } + const auto rel_path = std::string_view(guest_directory).substr(pos); + const auto host_path = mount->host_path / rel_path; + if (!NeedsCaseInsensiveSearch) { return host_path; } - return ""; + + // If the path does not exist attempt to verify this. + // Retrieve parent path until we find one that exists. + path_parts.clear(); + auto current_path = host_path; + while (!std::filesystem::exists(current_path)) { + // We have probably cached this if it's a folder. + if (auto it = path_cache.find(current_path); it != path_cache.end()) { + current_path = it->second; + break; + } + path_parts.emplace_back(current_path.filename()); + current_path = current_path.parent_path(); + } + + // We have found an anchor. Traverse parts we recoded and see if they + // exist in filesystem but in different case. + auto guest_path = current_path; + while (!path_parts.empty()) { + const auto& part = path_parts.back(); + const auto add_match = [&](const auto& host_part) { + current_path /= host_part; + guest_path /= part; + path_cache[guest_path] = current_path; + path_parts.pop_back(); + }; + + // Can happen when the mismatch is in upper folder. + if (std::filesystem::exists(current_path / part)) { + add_match(part); + continue; + } + const auto part_low = Common::ToLower(part.string()); + bool found_match = false; + for (const auto& path : std::filesystem::directory_iterator(current_path)) { + const auto candidate = path.path().filename(); + const auto filename = Common::ToLower(candidate.string()); + // Check if a filename matches in case insensitive manner. + if (filename != part_low) { + continue; + } + // We found a match, record the actual path in the cache. + add_match(candidate); + found_match = true; + break; + } + if (!found_match) { + // Opening the guest path will surely fail but at least gives + // a better error message than the empty path. + return host_path; + } + } + + // The path was found. + return current_path; } int HandleTable::CreateHandle() { @@ -105,8 +135,7 @@ File* HandleTable::GetFile(int d) { return m_files.at(d - RESERVED_HANDLES); } -File* HandleTable::getFile(const std::string& host_name) { - std::scoped_lock lock{m_mutex}; +File* HandleTable::GetFile(const std::filesystem::path& host_name) { for (auto* file : m_files) { if (file != nullptr && file->m_host_name == host_name) { return file; diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index b31931d1..d636f8bf 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -7,28 +7,42 @@ #include #include #include +#include #include "common/io_file.h" namespace Core::FileSys { class MntPoints { +#ifdef _WIN64 + static constexpr bool NeedsCaseInsensiveSearch = false; +#else + static constexpr bool NeedsCaseInsensiveSearch = true; +#endif public: struct MntPair { - std::string host_path; - std::string guest_path; // e.g /app0/ + std::filesystem::path host_path; + std::string mount; // e.g /app0/ }; - MntPoints() = default; - virtual ~MntPoints() = default; + explicit MntPoints() = default; + ~MntPoints() = default; void Mount(const std::filesystem::path& host_folder, const std::string& guest_folder); void Unmount(const std::filesystem::path& host_folder, const std::string& guest_folder); void UnmountAll(); - std::string GetHostDirectory(const std::string& guest_directory); - std::string GetHostFile(const std::string& guest_file); + + std::filesystem::path GetHostPath(const std::string& guest_directory); + + const MntPair* GetMount(const std::string& guest_path) { + const auto it = std::ranges::find_if( + m_mnt_pairs, [&](const auto& mount) { return guest_path.starts_with(mount.mount); }); + return it == m_mnt_pairs.end() ? nullptr : &*it; + } private: std::vector m_mnt_pairs; + std::vector path_parts; + tsl::robin_map path_cache; std::mutex m_mutex; }; @@ -40,7 +54,7 @@ struct DirEntry { struct File { std::atomic_bool is_opened{}; std::atomic_bool is_directory{}; - std::string m_host_name; + std::filesystem::path m_host_name; std::string m_guest_name; Common::FS::IOFile f; std::vector dirents; @@ -56,7 +70,7 @@ public: int CreateHandle(); void DeleteHandle(int d); File* GetFile(int d); - File* getFile(const std::string& host_name); + File* GetFile(const std::filesystem::path& host_name); private: std::vector m_files; diff --git a/src/core/libraries/dialogs/ime_dialog.cpp b/src/core/libraries/dialogs/ime_dialog.cpp index 5e2c2ffc..e73c1881 100644 --- a/src/core/libraries/dialogs/ime_dialog.cpp +++ b/src/core/libraries/dialogs/ime_dialog.cpp @@ -59,9 +59,11 @@ int PS4_SYSV_ABI sceImeDialogGetStatus() { return g_ime_dlg_status; } -int PS4_SYSV_ABI sceImeDialogInit(const OrbisImeDialogParam* param, - const OrbisImeParamExtended* extended) { +int PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExtended* extended) { LOG_ERROR(Lib_ImeDialog, "(STUBBED) called"); + const std::wstring_view text = L"shadPS4"; + param->maxTextLength = text.size(); + std::memcpy(param->inputTextBuffer, text.data(), text.size() * sizeof(wchar_t)); g_ime_dlg_status = OrbisImeDialogStatus::ORBIS_IME_DIALOG_STATUS_RUNNING; return ORBIS_OK; } diff --git a/src/core/libraries/dialogs/ime_dialog.h b/src/core/libraries/dialogs/ime_dialog.h index 08c980a4..ffe42b31 100644 --- a/src/core/libraries/dialogs/ime_dialog.h +++ b/src/core/libraries/dialogs/ime_dialog.h @@ -174,8 +174,7 @@ int PS4_SYSV_ABI sceImeDialogGetPanelSize(); int PS4_SYSV_ABI sceImeDialogGetPanelSizeExtended(); int PS4_SYSV_ABI sceImeDialogGetResult(OrbisImeDialogResult* result); /*OrbisImeDialogStatus*/ int PS4_SYSV_ABI sceImeDialogGetStatus(); -int PS4_SYSV_ABI sceImeDialogInit(const OrbisImeDialogParam* param, - const OrbisImeParamExtended* extended); +int PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExtended* extended); int PS4_SYSV_ABI sceImeDialogInitInternal(); int PS4_SYSV_ABI sceImeDialogInitInternal2(); int PS4_SYSV_ABI sceImeDialogInitInternal3(); diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 2e475413..ead4ff23 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -346,7 +346,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { EqueueEvent kernel_event{}; kernel_event.event.ident = id; kernel_event.event.filter = SceKernelEvent::Filter::GraphicsCore; - kernel_event.event.flags = SceKernelEvent::Flags::Add; + // The library only sets EV_ADD but it is suspected the kernel driver forces EV_CLEAR + kernel_event.event.flags = SceKernelEvent::Flags::Clear; kernel_event.event.fflags = 0; kernel_event.event.data = id; kernel_event.event.udata = udata; @@ -649,6 +650,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset, cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[3] = 0; + cmdbuf += 4; WriteTrailingNop<3>(cmdbuf); return ORBIS_OK; } @@ -704,6 +706,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[3] = 2; // auto index + cmdbuf += 4; WriteTrailingNop<3>(cmdbuf); return ORBIS_OK; } @@ -1409,9 +1412,8 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, // repeat set shader functionality here as it is trivial. cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -1468,18 +1470,44 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, // pointer to a stack memory, so the check will likely fail. To workaround it we will // repeat set shader functionality here as it is trivial. cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], vs_regs[1]); // SPI_SHADER_PGM_LO_VS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], + vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS/SPI_SHADER_PGM_RSRC2_VS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetEsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetEsShader(u32* cmdbuf, u32 size, const u32* es_regs, u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size < 0x14) { + return -1; + } + + if (!es_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + if (shader_modifier & 0xfcfffc3f) { + LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask"); + return -1; + } + + if (es_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + const u32 var = + shader_modifier == 0 ? es_regs[2] : ((es_regs[2] & 0xfcfffc3f) | shader_modifier); + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0xc8u, es_regs[0], 0u); // SPI_SHADER_PGM_LO_ES + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0xcau, var, es_regs[3]); // SPI_SHADER_PGM_RSRC1_ES + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -1488,18 +1516,93 @@ int PS4_SYSV_ABI sceGnmSetGsRingSizes() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetGsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size < 0x1d) { + return -1; + } + + if (!gs_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + if (gs_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x88u, gs_regs[0], 0u); // SPI_SHADER_PGM_LO_GS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[2], + gs_regs[3]); // SPI_SHADER_PGM_RSRC1_GS/SPI_SHADER_PGM_RSRC2_GS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2e5u, gs_regs[4]); // VGT_STRMOUT_CONFIG + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x29bu, gs_regs[5]); // VGT_GS_OUT_PRIM_TYPE + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2e4u, gs_regs[6]); // VGT_GS_INSTANCE_CNT + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetHsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size < 0x1E) { + return -1; + } + + if (!hs_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + if (hs_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], + hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], + hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetLsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetLsShader(u32* cmdbuf, u32 size, const u32* ls_regs, u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size < 0x17) { + return -1; + } + + if (!ls_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + const auto modifier_mask = ((shader_modifier & 0xfffffc3f) == 0) ? 0xfffffc3f : 0xfcfffc3f; + if (shader_modifier & modifier_mask) { + LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask"); + return -1; + } + + if (ls_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + const u32 var = + shader_modifier == 0 ? ls_regs[2] : ((ls_regs[2] & modifier_mask) | shader_modifier); + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x148u, ls_regs[0], 0u); // SPI_SHADER_PGM_LO_LS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x14bu, ls_regs[3]); // SPI_SHADER_PGM_RSRC2_LS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x14au, var, ls_regs[3]); // SPI_SHADER_PGM_RSRC1_LS + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -1523,9 +1626,9 @@ s32 PS4_SYSV_ABI sceGnmSetPsShader(u32* cmdbuf, u32 size, const u32* ps_regs) { cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg( cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -1561,9 +1664,9 @@ s32 PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg( cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -2052,8 +2155,34 @@ int PS4_SYSV_ABI sceGnmUnregisterResource() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmUpdateGsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmUpdateGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size < 0x1d) { + return -1; + } + + if (!gs_regs) { + LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument"); + return -1; + } + + if (gs_regs[1] != 0) { + LOG_ERROR(Lib_GnmDriver, "Invalid shader address"); + return -1; + } + + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x88u, gs_regs[0], 0u); // SPI_SHADER_PGM_LO_GS + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[2], + gs_regs[3]); // SPI_SHADER_PGM_RSRC1_GS/SPI_SHADER_PGM_RSRC2_GS + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e02e5u, + gs_regs[4]); + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e029bu, + gs_regs[5]); + cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e02e4u, + gs_regs[6]); + + WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; } @@ -2082,9 +2211,9 @@ s32 PS4_SYSV_ABI sceGnmUpdatePsShader(u32* cmdbuf, u32 size, const u32* ps_regs) cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = WritePacket( cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update @@ -2127,9 +2256,9 @@ s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_re cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = WritePacket( cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update @@ -2173,7 +2302,8 @@ s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, return -1; } - const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier); + const u32 var = + shader_modifier == 0 ? vs_regs[2] : ((vs_regs[2] & 0xfcfffc3f) | shader_modifier); cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0207u, diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 808cdf51..8100b116 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -149,11 +149,11 @@ s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 shader_modifier); s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier); -int PS4_SYSV_ABI sceGnmSetEsShader(); +s32 PS4_SYSV_ABI sceGnmSetEsShader(u32* cmdbuf, u32 size, const u32* es_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetGsRingSizes(); -int PS4_SYSV_ABI sceGnmSetGsShader(); -int PS4_SYSV_ABI sceGnmSetHsShader(); -int PS4_SYSV_ABI sceGnmSetLsShader(); +s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs); +s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4); +s32 PS4_SYSV_ABI sceGnmSetLsShader(u32* cmdbuf, u32 size, const u32* ls_regs, u32 shader_modifier); s32 PS4_SYSV_ABI sceGnmSetPsShader(u32* cmdbuf, u32 size, const u32* ps_regs); s32 PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs); int PS4_SYSV_ABI sceGnmSetResourceRegistrationUserMemory(); @@ -216,7 +216,7 @@ int PS4_SYSV_ABI sceGnmUnmapComputeQueue(); int PS4_SYSV_ABI sceGnmUnregisterAllResourcesForOwner(); int PS4_SYSV_ABI sceGnmUnregisterOwnerAndResources(); int PS4_SYSV_ABI sceGnmUnregisterResource(); -int PS4_SYSV_ABI sceGnmUpdateGsShader(); +s32 PS4_SYSV_ABI sceGnmUpdateGsShader(u32* cmdbuf, u32 size, const u32* gs_regs); int PS4_SYSV_ABI sceGnmUpdateHsShader(); s32 PS4_SYSV_ABI sceGnmUpdatePsShader(u32* cmdbuf, u32 size, const u32* ps_regs); s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_regs); diff --git a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp index 46429d40..fe0f34b8 100644 --- a/src/core/libraries/kernel/event_flag/event_flag_obj.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag_obj.cpp @@ -73,7 +73,12 @@ int EventFlagInternal::Wait(u64 bits, WaitMode wait_mode, ClearMode clear_mode, int EventFlagInternal::Poll(u64 bits, WaitMode wait_mode, ClearMode clear_mode, u64* result) { u32 micros = 0; - return Wait(bits, wait_mode, clear_mode, result, µs); + auto ret = Wait(bits, wait_mode, clear_mode, result, µs); + if (ret == ORBIS_KERNEL_ERROR_ETIMEDOUT) { + // Poll returns EBUSY instead. + ret = ORBIS_KERNEL_ERROR_EBUSY; + } + return ret; } void EventFlagInternal::Set(u64 bits) { diff --git a/src/core/libraries/kernel/event_queue.cpp b/src/core/libraries/kernel/event_queue.cpp index 18561d6b..7d5163cf 100644 --- a/src/core/libraries/kernel/event_queue.cpp +++ b/src/core/libraries/kernel/event_queue.cpp @@ -94,7 +94,7 @@ int EqueueInternal::GetTriggeredEvents(SceKernelEvent* ev, int num) { for (auto& event : m_events) { if (event.IsTriggered()) { - if (ev->flags & SceKernelEvent::Flags::Clear) { + if (event.event.flags & SceKernelEvent::Flags::Clear) { event.Reset(); } diff --git a/src/core/libraries/kernel/event_queues.cpp b/src/core/libraries/kernel/event_queues.cpp index 12f59e50..bb3d8ba7 100644 --- a/src/core/libraries/kernel/event_queues.cpp +++ b/src/core/libraries/kernel/event_queues.cpp @@ -7,8 +7,6 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/event_queues.h" -#include - namespace Libraries::Kernel { extern boost::asio::io_context io_context; @@ -136,8 +134,7 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* event.timer = std::make_unique( io_context, std::chrono::microseconds(total_us - HrTimerSpinlockThresholdUs)); - event.timer->async_wait( - std::bind(SmallTimerCallback, boost::asio::placeholders::error, eq, event.event)); + event.timer->async_wait(std::bind(SmallTimerCallback, std::placeholders::_1, eq, event.event)); if (!eq->AddEvent(event)) { return ORBIS_KERNEL_ERROR_ENOMEM; diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 466399e1..8734b964 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -13,7 +13,7 @@ namespace Libraries::Kernel { -std::vector GetDirectoryEntries(const std::string& path) { +std::vector GetDirectoryEntries(const std::filesystem::path& path) { std::vector files; for (const auto& entry : std::filesystem::directory_iterator(path)) { auto& dir_entry = files.emplace_back(); @@ -58,7 +58,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { if (directory) { file->is_directory = true; file->m_guest_name = path; - file->m_host_name = mnt->GetHostDirectory(file->m_guest_name); + file->m_host_name = mnt->GetHostPath(file->m_guest_name); if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist h->DeleteHandle(handle); return ORBIS_KERNEL_ERROR_ENOTDIR; @@ -72,7 +72,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { } } else { file->m_guest_name = path; - file->m_host_name = mnt->GetHostFile(file->m_guest_name); + file->m_host_name = mnt->GetHostPath(file->m_guest_name); int e = 0; if (read) { e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); @@ -165,8 +165,7 @@ int PS4_SYSV_ABI sceKernelUnlink(const char* path) { auto* h = Common::Singleton::Instance(); auto* mnt = Common::Singleton::Instance(); - std::string host_path = mnt->GetHostFile(path); - + const auto host_path = mnt->GetHostPath(path); if (host_path.empty()) { return SCE_KERNEL_ERROR_EACCES; } @@ -175,7 +174,7 @@ int PS4_SYSV_ABI sceKernelUnlink(const char* path) { return SCE_KERNEL_ERROR_EPERM; } - auto* file = h->getFile(host_path); + auto* file = h->GetFile(host_path); if (file != nullptr) { file->f.Unlink(); } @@ -250,7 +249,7 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) { return SCE_KERNEL_ERROR_EINVAL; } auto* mnt = Common::Singleton::Instance(); - std::string dir_name = mnt->GetHostFile(path); + const auto dir_name = mnt->GetHostPath(path); if (std::filesystem::exists(dir_name)) { return SCE_KERNEL_ERROR_EEXIST; } @@ -279,7 +278,7 @@ int PS4_SYSV_ABI posix_mkdir(const char* path, u16 mode) { int PS4_SYSV_ABI sceKernelStat(const char* path, OrbisKernelStat* sb) { LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); - const auto& path_name = mnt->GetHostFile(path); + const auto path_name = mnt->GetHostPath(path); std::memset(sb, 0, sizeof(OrbisKernelStat)); const bool is_dir = std::filesystem::is_directory(path_name); const bool is_file = std::filesystem::is_regular_file(path_name); @@ -314,7 +313,7 @@ int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { auto* mnt = Common::Singleton::Instance(); - std::string path_name = mnt->GetHostFile(path); + const auto path_name = mnt->GetHostPath(path); if (!std::filesystem::exists(path_name)) { return SCE_KERNEL_ERROR_ENOENT; } diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index fbf86cdf..16274236 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -222,7 +222,7 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg } auto* mnt = Common::Singleton::Instance(); - const auto path = mnt->GetHostFile(moduleFileName); + const auto path = mnt->GetHostPath(moduleFileName); // Load PRX module and relocate any modules that import it. auto* linker = Common::Singleton::Instance(); diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index a0e0e8f2..943098e8 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -318,7 +318,8 @@ int PS4_SYSV_ABI scePthreadAttrGetstackaddr(const ScePthreadAttr* attr, void** s return SCE_KERNEL_ERROR_EINVAL; } - int result = pthread_attr_getstackaddr(&(*attr)->pth_attr, stack_addr); + size_t stack_size = 0; + int result = pthread_attr_getstack(&(*attr)->pth_attr, stack_addr, &stack_size); return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } @@ -340,7 +341,10 @@ int PS4_SYSV_ABI scePthreadAttrSetstackaddr(ScePthreadAttr* attr, void* addr) { return SCE_KERNEL_ERROR_EINVAL; } - int result = pthread_attr_setstackaddr(&(*attr)->pth_attr, addr); + size_t stack_size = 0; + pthread_attr_getstacksize(&(*attr)->pth_attr, &stack_size); + + int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, stack_size); return result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL; } @@ -831,6 +835,10 @@ int PS4_SYSV_ABI posix_pthread_mutexattr_destroy(ScePthreadMutexattr* attr) { return result; } +int PS4_SYSV_ABI posix_pthread_once(pthread_once_t* once_control, void (*init_routine)(void)) { + return pthread_once(once_control, init_routine); +} + int PS4_SYSV_ABI posix_pthread_mutexattr_setprotocol(ScePthreadMutexattr* attr, int protocol) { int result = scePthreadMutexattrSetprotocol(attr, protocol); LOG_INFO(Kernel_Pthread, "redirect to scePthreadMutexattrSetprotocol: result = {}", result); @@ -1002,17 +1010,7 @@ ScePthread PThreadPool::Create() { } } -#ifdef _WIN64 auto* ret = new PthreadInternal{}; -#else - // TODO: Linux specific hack - static u8* hint_address = reinterpret_cast(0x7FFFFC000ULL); - auto* ret = reinterpret_cast( - mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); - hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB); -#endif - ret->is_free = false; ret->is_detached = false; ret->is_almost_done = false; @@ -1443,6 +1441,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { posix_pthread_condattr_destroy); LIB_FUNCTION("EjllaAqAPZo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_setclock); + LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); // openorbis weird functions LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); diff --git a/src/core/libraries/libc/libc_stdio.cpp b/src/core/libraries/libc/libc_stdio.cpp index b1e94676..2b15bd36 100644 --- a/src/core/libraries/libc/libc_stdio.cpp +++ b/src/core/libraries/libc/libc_stdio.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/assert.h" +#include "common/logging/log.h" #include "common/singleton.h" #include "core/file_sys/fs.h" #include "core/libraries/libc/libc_stdio.h" @@ -10,11 +10,12 @@ namespace Libraries::LibC { std::FILE* PS4_SYSV_ABI ps4_fopen(const char* filename, const char* mode) { auto* mnt = Common::Singleton::Instance(); - FILE* f = std::fopen(mnt->GetHostFile(filename).c_str(), mode); + const auto host_path = mnt->GetHostPath(filename).string(); + FILE* f = std::fopen(host_path.c_str(), mode); if (f != nullptr) { - LOG_INFO(Lib_LibC, "fopen = {}", mnt->GetHostFile(filename).c_str()); + LOG_INFO(Lib_LibC, "fopen = {}", host_path); } else { - LOG_INFO(Lib_LibC, "fopen can't open = {}", mnt->GetHostFile(filename).c_str()); + LOG_INFO(Lib_LibC, "fopen can't open = {}", host_path); } return f; } diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 7fa6c7c0..db6d0964 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -15,7 +15,8 @@ #include "error_codes.h" namespace Libraries::SaveData { -static std::string g_mount_point = "/savedata0"; // temp mount point (todo) + +static constexpr std::string_view g_mount_point = "/savedata0"; // temp mount point (todo) std::string game_serial; int PS4_SYSV_ABI sceSaveDataAbort() { @@ -50,11 +51,11 @@ int PS4_SYSV_ABI sceSaveDataChangeInternal() { int PS4_SYSV_ABI sceSaveDataCheckBackupData(const OrbisSaveDataCheckBackupData* check) { auto* mnt = Common::Singleton::Instance(); - std::string mount_dir = mnt->GetHostFile(check->dirName->data); + const auto mount_dir = mnt->GetHostPath(check->dirName->data); if (!std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; } - LOG_INFO(Lib_SaveData, "called = {}", mount_dir); + LOG_INFO(Lib_SaveData, "called = {}", mount_dir.string()); return ORBIS_OK; } @@ -344,14 +345,14 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam return false; } file.Seek(getParam->data->offset); - size_t nbytes = file.ReadRaw(getParam->data->buf, getParam->data->bufSize); + file.ReadRaw(getParam->data->buf, getParam->data->bufSize); LOG_INFO(Lib_SaveData, "called: bufSize = {}, offset = {}", getParam->data->bufSize, getParam->data->offset); } if (getParam->param != nullptr) { - Common::FS::IOFile file1(mount_dir / "param.txt", Common::FS::FileAccessMode::Read); - size_t nbytes = file1.ReadRaw(getParam->param, sizeof(OrbisSaveDataParam)); + Common::FS::IOFile file(mount_dir / "param.txt", Common::FS::FileAccessMode::Read); + file.ReadRaw(getParam->param, sizeof(OrbisSaveDataParam)); } return ORBIS_OK; @@ -421,13 +422,13 @@ int PS4_SYSV_ABI sceSaveDataIsMounted() { int PS4_SYSV_ABI sceSaveDataLoadIcon(const OrbisSaveDataMountPoint* mountPoint, OrbisSaveDataIcon* icon) { auto* mnt = Common::Singleton::Instance(); - std::string mount_dir = mnt->GetHostFile(mountPoint->data); - LOG_INFO(Lib_SaveData, "called: dir = {}", mount_dir); + const auto mount_dir = mnt->GetHostPath(mountPoint->data); + LOG_INFO(Lib_SaveData, "called: dir = {}", mount_dir.string()); if (icon != nullptr) { - Common::FS::IOFile file(mount_dir + "/save_data.png", Common::FS::FileAccessMode::Read); + Common::FS::IOFile file(mount_dir / "save_data.png", Common::FS::FileAccessMode::Read); icon->bufSize = file.GetSize(); - size_t nbytes = file.ReadRaw(icon->buf, icon->bufSize); + file.ReadRaw(icon->buf, icon->bufSize); } return ORBIS_OK; } @@ -436,6 +437,7 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, OrbisSaveDataMountResult* mount_result) { const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / std::to_string(user_id) / game_serial / dir_name; + auto* mnt = Common::Singleton::Instance(); switch (mount_mode) { case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY: case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR: @@ -444,9 +446,8 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, if (!std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; } - auto* mnt = Common::Singleton::Instance(); mount_result->mount_status = 0; - std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16); + g_mount_point.copy(mount_result->mount_point.data, 16); mnt->Mount(mount_dir, mount_result->mount_point.data); } break; case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE: @@ -458,16 +459,15 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON: case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF | ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON: { - auto* mnt = Common::Singleton::Instance(); if (std::filesystem::exists(mount_dir)) { - std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16); + g_mount_point.copy(mount_result->mount_point.data, 16); mnt->Mount(mount_dir, mount_result->mount_point.data); mount_result->required_blocks = 0; mount_result->mount_status = 0; return ORBIS_SAVE_DATA_ERROR_EXISTS; } if (std::filesystem::create_directories(mount_dir)) { - std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16); + g_mount_point.copy(mount_result->mount_point.data, 16); mnt->Mount(mount_dir, mount_result->mount_point.data); mount_result->mount_status = 1; } @@ -478,8 +478,7 @@ s32 saveDataMount(u32 user_id, char* dir_name, u32 mount_mode, if (!std::filesystem::exists(mount_dir)) { std::filesystem::create_directories(mount_dir); } - auto* mnt = Common::Singleton::Instance(); - std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16); + g_mount_point.copy(mount_result->mount_point.data, 16); mnt->Mount(mount_dir, mount_result->mount_point.data); mount_result->mount_status = 1; } break; @@ -561,12 +560,12 @@ int PS4_SYSV_ABI sceSaveDataRestoreLoadSaveDataMemory() { int PS4_SYSV_ABI sceSaveDataSaveIcon(const OrbisSaveDataMountPoint* mountPoint, const OrbisSaveDataIcon* icon) { auto* mnt = Common::Singleton::Instance(); - std::string mount_dir = mnt->GetHostFile(mountPoint->data); - LOG_INFO(Lib_SaveData, "called = {}", mount_dir); + const auto mount_dir = mnt->GetHostPath(mountPoint->data); + LOG_INFO(Lib_SaveData, "called = {}", mount_dir.string()); if (icon != nullptr) { - Common::FS::IOFile file(mount_dir + "/save_data.png", Common::FS::FileAccessMode::Write); - file.WriteRaw((void*)icon->buf, icon->bufSize); + Common::FS::IOFile file(mount_dir / "save_data.png", Common::FS::FileAccessMode::Write); + file.WriteRaw(icon->buf, icon->bufSize); } return ORBIS_OK; } @@ -585,12 +584,13 @@ int PS4_SYSV_ABI sceSaveDataSetParam(const OrbisSaveDataMountPoint* mountPoint, OrbisSaveDataParamType paramType, const void* paramBuf, size_t paramBufSize) { auto* mnt = Common::Singleton::Instance(); - std::string mount_dir = mnt->GetHostFile(mountPoint->data); - LOG_INFO(Lib_SaveData, "called = {}, mountPoint->data = {}", mount_dir, mountPoint->data); + const auto mount_dir = mnt->GetHostPath(mountPoint->data); + LOG_INFO(Lib_SaveData, "called = {}, mountPoint->data = {}", mount_dir.string(), + mountPoint->data); if (paramBuf != nullptr) { - Common::FS::IOFile file(mount_dir + "/param.txt", Common::FS::FileAccessMode::Write); - file.WriteRaw((void*)paramBuf, paramBufSize); + Common::FS::IOFile file(mount_dir / "param.txt", Common::FS::FileAccessMode::Write); + file.WriteRaw(paramBuf, paramBufSize); } return ORBIS_OK; } @@ -738,24 +738,23 @@ int PS4_SYSV_ABI sceSaveDataUmountSys() { int PS4_SYSV_ABI sceSaveDataUmountWithBackup(const OrbisSaveDataMountPoint* mountPoint) { LOG_ERROR(Lib_SaveData, "called = {}", std::string(mountPoint->data)); auto* mnt = Common::Singleton::Instance(); - std::string mount_dir = mnt->GetHostFile(mountPoint->data); + const auto mount_dir = mnt->GetHostPath(mountPoint->data); if (!std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; - } else { - std::filesystem::path mnt_dir(mount_dir); - std::filesystem::create_directories(mnt_dir.parent_path() / "backup"); - - for (const auto& entry : std::filesystem::recursive_directory_iterator(mnt_dir)) { - const auto& path = entry.path(); - std::filesystem::path target_path = mnt_dir.parent_path() / "backup"; - - if (std::filesystem::is_regular_file(path)) { - std::filesystem::copy(path, target_path, - std::filesystem::copy_options::overwrite_existing); - } - } - mnt->Unmount(mount_dir, mountPoint->data); } + + std::filesystem::create_directories(mount_dir.parent_path() / "backup"); + + for (const auto& entry : std::filesystem::recursive_directory_iterator(mount_dir)) { + const auto& path = entry.path(); + const auto target_path = mount_dir.parent_path() / "backup"; + if (std::filesystem::is_regular_file(path)) { + std::filesystem::copy(path, target_path, + std::filesystem::copy_options::overwrite_existing); + } + } + + mnt->Unmount(mount_dir, mountPoint->data); return ORBIS_OK; } diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 51cfcf4c..8fbd69c4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -50,13 +50,16 @@ s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, Kernel::EqueueEvent event{}; event.event.ident = SCE_VIDEO_OUT_EVENT_FLIP; event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; + // The library only sets EV_ADD but kernel driver forces EV_CLEAR + event.event.flags = Kernel::SceKernelEvent::Flags::Clear; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; event.data = port; + eq->AddEvent(event); port->flip_events.push_back(eq); - return eq->AddEvent(event); + return ORBIS_OK; } s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata) { @@ -74,13 +77,16 @@ s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handl Kernel::EqueueEvent event{}; event.event.ident = SCE_VIDEO_OUT_EVENT_VBLANK; event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; + // The library only sets EV_ADD but kernel driver forces EV_CLEAR + event.event.flags = Kernel::SceKernelEvent::Flags::Clear; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; event.data = port; + eq->AddEvent(event); port->vblank_events.push_back(eq); - return eq->AddEvent(event); + return ORBIS_OK; } s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses, diff --git a/src/core/tls.cpp b/src/core/tls.cpp index 3f7bbceb..0c2d973b 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -9,7 +9,8 @@ #ifdef _WIN32 #include #else -#include +#include /* Definition of ARCH_* constants */ +#include /* Definition of SYS_* constants */ #endif namespace Core { @@ -89,47 +90,28 @@ static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGe #else -static pthread_key_t slot = 0; +static u32 slot = 0; void SetTcbBase(void* image_address) { - ASSERT(pthread_setspecific(slot, image_address) == 0); + asm volatile("wrgsbase %0" ::"r"(image_address) : "memory"); } Tcb* GetTcbBase() { - return reinterpret_cast(pthread_getspecific(slot)); + Tcb* tcb; + asm volatile("rdgsbase %0" : "=r"(tcb)::"memory"); + return tcb; } -static void AllocTcbKey() { - ASSERT(pthread_key_create(&slot, nullptr) == 0); -} +static void AllocTcbKey() {} static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGenerator& c) { using namespace Xbyak::util; const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size; - // Replace mov instruction with near jump to the trampoline. - static constexpr u32 NearJmpSize = 5; + // Replace fs read with gs read. auto patch = Xbyak::CodeGenerator(total_size, code); - patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR); - patch.nop(total_size - NearJmpSize); - - // Write the trampoline. - // The following logic is based on the glibc implementation of pthread_getspecific - // https://github.com/bminor/glibc/blob/29807a27/nptl/pthread_getspecific.c#L23 - static constexpr u32 PthreadKeySecondLevelSize = 32; - static constexpr u32 PthreadSpecificOffset = 0x510; - static constexpr u32 PthreadKeyDataSize = 16; - ASSERT(slot >= PthreadKeySecondLevelSize); - - const u32 idx1st = slot / PthreadKeySecondLevelSize; - const u32 idx2nd = slot % PthreadKeySecondLevelSize; const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg); - c.mov(target_reg, PthreadSpecificOffset); - c.putSeg(fs); - c.mov(target_reg, qword[target_reg + idx1st * 8]); // Load first level specific array. - c.mov(target_reg, qword[target_reg + idx2nd * 16 + - 8]); // Load data member of pthread_key_data our slot specifies. - c.jmp(code + total_size); // Return to the instruction right after the mov. + patch.putSeg(gs); } #endif diff --git a/src/emulator.cpp b/src/emulator.cpp index a60aea1b..91d92cd6 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -148,39 +148,35 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", nullptr}, + {"libSceFiber.sprx", nullptr}, + {"libSceUlt.sprx", nullptr}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceJpegEnc.sprx", nullptr}, - {"libSceJson2.sprx", nullptr}}}; + {"libSceJson2.sprx", nullptr}}, + }; std::vector found_modules; const auto& sys_module_path = Common::FS::GetUserPath(Common::FS::PathType::SysModuleDir); for (const auto& entry : std::filesystem::directory_iterator(sys_module_path)) { found_modules.push_back(entry.path()); } - for (auto it : ModulesToLoad) { - bool found = false; - std::filesystem::path foundpath; - for (auto f : found_modules) { - if (f.filename().string() == it.module_name) { - found = true; - foundpath = f; - break; - } + for (const auto& [module_name, init_func] : ModulesToLoad) { + const auto it = std::ranges::find_if( + found_modules, [&](const auto& path) { return path.filename() == module_name; }); + if (it != found_modules.end()) { + LOG_INFO(Loader, "Loading {}", it->string()); + linker->LoadModule(*it); + continue; } - if (found) { - LOG_INFO(Loader, "Loading {}", foundpath.string().c_str()); - linker->LoadModule(foundpath); + if (init_func) { + LOG_INFO(Loader, "Can't Load {} switching to HLE", module_name); + init_func(&linker->GetHLESymbols()); } else { - if (it.callback != nullptr) { - LOG_INFO(Loader, "Can't Load {} switching to HLE", it.module_name); - it.callback(&linker->GetHLESymbols()); - } else { - LOG_INFO(Loader, "No HLE available for {} module", it.module_name); - } + LOG_INFO(Loader, "No HLE available for {} module", module_name); } } } diff --git a/src/qt_gui/game_grid_frame.cpp b/src/qt_gui/game_grid_frame.cpp index ca28e9ce..80f4823b 100644 --- a/src/qt_gui/game_grid_frame.cpp +++ b/src/qt_gui/game_grid_frame.cpp @@ -114,8 +114,9 @@ void GameGridFrame::SetGridBackgroundImage(int row, int column) { if (item) { QString pic1Path = QString::fromStdString((*m_games_shared)[itemID].pic_path); QString blurredPic1Path = - qApp->applicationDirPath() + - QString::fromStdString("/game_data/" + (*m_games_shared)[itemID].serial + "/pic1.png"); + QDir::currentPath() + + QString::fromStdString("/user/game_data/" + (*m_games_shared)[itemID].serial + + "/pic1.png"); backgroundImage = QImage(blurredPic1Path); if (backgroundImage.isNull()) { @@ -123,7 +124,7 @@ void GameGridFrame::SetGridBackgroundImage(int row, int column) { backgroundImage = m_game_list_utils.BlurImage(image, image.rect(), 16); std::filesystem::path img_path = - std::filesystem::path("game_data/") / (*m_games_shared)[itemID].serial; + std::filesystem::path("user/game_data/") / (*m_games_shared)[itemID].serial; std::filesystem::create_directories(img_path); if (!backgroundImage.save(blurredPic1Path, "PNG")) { // qDebug() << "Error: Unable to save image."; @@ -142,4 +143,4 @@ void GameGridFrame::RefreshGridBackgroundImage() { palette.setColor(QPalette::Highlight, transparentColor); this->setPalette(palette); } -} \ No newline at end of file +} diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 5bc00c7f..327da72f 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -99,8 +99,8 @@ void GameListFrame::SetListBackgroundImage(QTableWidgetItem* item) { QString pic1Path = QString::fromStdString(m_game_info->m_games[item->row()].pic_path); QString blurredPic1Path = - qApp->applicationDirPath() + - QString::fromStdString("/game_data/" + m_game_info->m_games[item->row()].serial + + QDir::currentPath() + + QString::fromStdString("/user/game_data/" + m_game_info->m_games[item->row()].serial + "/pic1.png"); backgroundImage = QImage(blurredPic1Path); @@ -109,7 +109,7 @@ void GameListFrame::SetListBackgroundImage(QTableWidgetItem* item) { backgroundImage = m_game_list_utils.BlurImage(image, image.rect(), 16); std::filesystem::path img_path = - std::filesystem::path("game_data/") / m_game_info->m_games[item->row()].serial; + std::filesystem::path("user/game_data/") / m_game_info->m_games[item->row()].serial; std::filesystem::create_directories(img_path); if (!backgroundImage.save(blurredPic1Path, "PNG")) { // qDebug() << "Error: Unable to save image."; @@ -206,4 +206,4 @@ void GameListFrame::SetRegionFlag(int row, int column, QString itemStr) { widget->setLayout(layout); this->setItem(row, column, item); this->setCellWidget(row, column, widget); -} \ No newline at end of file +} diff --git a/src/qt_gui/main.cpp b/src/qt_gui/main.cpp index 071e12b7..08c363b3 100644 --- a/src/qt_gui/main.cpp +++ b/src/qt_gui/main.cpp @@ -20,7 +20,7 @@ int main(int argc, char* argv[]) { // Load configurations and initialize Qt application const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); Config::load(config_dir / "config.toml"); - QString gameDataPath = qApp->applicationDirPath() + "/game_data/"; + QString gameDataPath = QDir::currentPath() + "/user/game_data/"; std::string stdStr = gameDataPath.toStdString(); std::filesystem::path path(stdStr); #ifdef _WIN64 diff --git a/src/qt_gui/trophy_viewer.cpp b/src/qt_gui/trophy_viewer.cpp index 8c28019e..4293745e 100644 --- a/src/qt_gui/trophy_viewer.cpp +++ b/src/qt_gui/trophy_viewer.cpp @@ -19,7 +19,7 @@ TrophyViewer::TrophyViewer(QString trophyPath, QString gameTrpPath) : QMainWindo } void TrophyViewer::PopulateTrophyWidget(QString title) { - QString trophyDir = qApp->applicationDirPath() + "/game_data/" + title + "/TrophyFiles"; + QString trophyDir = QDir::currentPath() + "/user/game_data/" + title + "/TrophyFiles"; QDir dir(trophyDir); if (!dir.exists()) { std::filesystem::path path(gameTrpPath_.toStdString()); @@ -142,4 +142,4 @@ void TrophyViewer::SetTableItem(QTableWidget* parent, int row, int column, QStri widget->setLayout(layout); parent->setItem(row, column, item); parent->setCellWidget(row, column, widget); -} \ No newline at end of file +} diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 75ee3ae9..c88a1cbb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -4,6 +4,8 @@ #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include + namespace Shader::Backend::SPIRV { namespace { @@ -209,57 +211,216 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen ctx.OpStore(pointer, value); } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto info = inst->Flags(); - const auto& buffer = ctx.buffers[handle]; - if (info.index_enable && info.offset_enable) { - UNREACHABLE(); - } else if (info.index_enable) { - const Id ptr{ - ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)}; - return ctx.OpLoad(buffer.data_types->Get(1), ptr); - } - UNREACHABLE(); -} - Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { return EmitLoadBufferF32(ctx, inst, handle, address); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto info = inst->Flags(); +template +static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { const auto& buffer = ctx.buffers[handle]; - boost::container::static_vector ids; - for (u32 i = 0; i < 2; i++) { - const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; - const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; - ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr)); + Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + if constexpr (N == 1) { + const Id ptr{ + ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)}; + return ctx.OpLoad(buffer.data_types->Get(1), ptr); + } else { + boost::container::static_vector ids; + for (u32 i = 0; i < N; i++) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr{ + ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; + ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr)); + } + return ctx.OpCompositeConstruct(buffer.data_types->Get(N), ids); } - return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto info = inst->Flags(); - const auto& buffer = ctx.buffers[handle]; - boost::container::static_vector ids; - for (u32 i = 0; i < 3; i++) { - const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; - const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; - ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr)); - } - return ctx.OpCompositeConstruct(buffer.data_types->Get(3), ids); +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferF32xN<1>(ctx, handle, address); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto info = inst->Flags(); - const auto& buffer = ctx.buffers[handle]; - boost::container::static_vector ids; - for (u32 i = 0; i < 4; i++) { - const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))}; - const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; - ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr)); +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferF32xN<2>(ctx, handle, address); +} + +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferF32xN<3>(ctx, handle, address); +} + +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferF32xN<4>(ctx, handle, address); +} + +static bool IsSignedInteger(AmdGpu::NumberFormat format) { + switch (format) { + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Uscaled: + case AmdGpu::NumberFormat::Uint: + return false; + case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::Sscaled: + case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberFormat::SnormNz: + return true; + case AmdGpu::NumberFormat::Float: + default: + UNREACHABLE(); } - return ctx.OpCompositeConstruct(buffer.data_types->Get(4), ids); +} + +static u32 UXBitsMax(u32 bit_width) { + return (1u << bit_width) - 1u; +} + +static u32 SXBitsMax(u32 bit_width) { + return (1u << (bit_width - 1u)) - 1u; +} + +static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) { + switch (format) { + case AmdGpu::NumberFormat::Unorm: + return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width)))); + case AmdGpu::NumberFormat::Snorm: + return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width)))); + case AmdGpu::NumberFormat::SnormNz: + // (x * 2 + 1) / (Format::SMAX * 2) + value = ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(2.f)); + value = ctx.OpFAdd(ctx.F32[1], value, ctx.ConstF32(1.f)); + return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width) * 2))); + case AmdGpu::NumberFormat::Uscaled: + case AmdGpu::NumberFormat::Sscaled: + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberFormat::Float: + return value; + default: + UNREACHABLE_MSG("Unsupported number fromat for conversion: {}", + magic_enum::enum_name(format)); + } +} + +static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offset) { + Id comp_offset = ctx.ConstU32(bit_offset); + if (stride < 4) { + // comp_offset += (address % 4) * 8; + const Id byte_offset = ctx.OpUMod(ctx.U32[1], address, ctx.ConstU32(4u)); + const Id bit_offset = ctx.OpShiftLeftLogical(ctx.U32[1], byte_offset, ctx.ConstU32(3u)); + comp_offset = ctx.OpIAdd(ctx.U32[1], comp_offset, bit_offset); + } + return comp_offset; +} + +static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) { + const auto& buffer = ctx.buffers[handle]; + const auto format = buffer.buffer.GetDataFmt(); + switch (format) { + case AmdGpu::DataFormat::FormatInvalid: + return ctx.f32_zero_value; + case AmdGpu::DataFormat::Format8: + case AmdGpu::DataFormat::Format16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format8_8: + case AmdGpu::DataFormat::Format16_16: + case AmdGpu::DataFormat::Format10_11_11: + case AmdGpu::DataFormat::Format11_11_10: + case AmdGpu::DataFormat::Format10_10_10_2: + case AmdGpu::DataFormat::Format2_10_10_10: + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format32_32: + case AmdGpu::DataFormat::Format16_16_16_16: + case AmdGpu::DataFormat::Format32_32_32: + case AmdGpu::DataFormat::Format32_32_32_32: { + const u32 num_components = AmdGpu::NumComponents(format); + if (comp >= num_components) { + return ctx.f32_zero_value; + } + + // uint index = address / 4; + Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const u32 stride = buffer.buffer.GetStride(); + if (stride > 4) { + const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32); + if (index_offset > 0) { + // index += index_offset; + index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(index_offset)); + } + } + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + + const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32; + const u32 bit_width = AmdGpu::ComponentBits(format, comp); + const auto num_format = buffer.buffer.GetNumberFmt(); + if (num_format == AmdGpu::NumberFormat::Float) { + if (bit_width == 32) { + return ctx.OpLoad(ctx.F32[1], ptr); + } else if (bit_width == 16) { + const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset); + Id value = ctx.OpLoad(ctx.U32[1], ptr); + value = + ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width)); + value = ctx.OpSConvert(ctx.U16, value); + value = ctx.OpBitcast(ctx.F16[1], value); + return ctx.OpFConvert(ctx.F32[1], value); + } else { + UNREACHABLE_MSG("Invalid float bit width {}", bit_width); + } + } else { + Id value = ctx.OpLoad(ctx.U32[1], ptr); + const bool is_signed = IsSignedInteger(num_format); + if (bit_width < 32) { + const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset); + if (is_signed) { + value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, + ctx.ConstU32(bit_width)); + value = ctx.OpConvertSToF(ctx.F32[1], value); + } else { + value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, + ctx.ConstU32(bit_width)); + value = ctx.OpConvertUToF(ctx.F32[1], value); + } + } else { + if (is_signed) { + value = ctx.OpConvertSToF(ctx.F32[1], value); + } else { + value = ctx.OpConvertUToF(ctx.F32[1], value); + } + } + return ConvertValue(ctx, value, num_format, bit_width); + } + break; + } + default: + UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format)); + } +} + +template +static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + if constexpr (N == 1) { + return GetBufferFormatValue(ctx, handle, address, 0); + } else { + boost::container::static_vector ids; + for (u32 i = 0; i < N; i++) { + ids.push_back(GetBufferFormatValue(ctx, handle, address, i)); + } + return ctx.OpCompositeConstruct(ctx.F32[N], ids); + } +} + +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferFormatF32xN<1>(ctx, inst, handle, address); +} + +Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferFormatF32xN<2>(ctx, inst, handle, address); +} + +Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferFormatF32xN<3>(ctx, inst, handle, address); +} + +Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address); } void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2d35b97c..17def57a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -9,6 +9,9 @@ namespace Shader::Backend::SPIRV { struct ImageOperands { void Add(spv::ImageOperandsMask new_mask, Id value) { + if (!Sirit::ValidId(value)) { + return; + } mask = static_cast(static_cast(mask) | static_cast(new_mask)); operands.push_back(value); @@ -25,9 +28,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::ConstOffset, offset); - } + operands.Add(spv::ImageOperandsMask::Offset, offset); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } @@ -61,18 +62,29 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); } -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2) { - UNREACHABLE_MSG("SPIR-V Instruction"); -} - -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref) { +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref); + const u32 comp = inst->Flags().gather_comp.Value(); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.Add(spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, + operands.operands); +} + +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, + Id offset2, Id dref) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id image = ctx.OpLoad(texture.image_type, texture.id); + const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); + const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); + ImageOperands operands; + operands.Add(spv::ImageOperandsMask::Offset, offset); + return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, + operands.operands); } Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e0b19f4f..e2b411e4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -66,6 +66,10 @@ Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); @@ -354,10 +358,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id bias_lc, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, Id offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, + Id offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 61ed29d5..3ea01a1d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -13,10 +13,12 @@ std::string_view StageName(Stage stage) { switch (stage) { case Stage::Vertex: return "vs"; - case Stage::TessellationControl: - return "tcs"; - case Stage::TessellationEval: - return "tes"; + case Stage::Local: + return "ls"; + case Stage::Export: + return "es"; + case Stage::Hull: + return "hs"; case Stage::Geometry: return "gs"; case Stage::Fragment: @@ -299,9 +301,7 @@ void EmitContext::DefineBuffers(const Info& info) { for (u32 i = 0; const auto& buffer : info.buffers) { const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; - const u32 stride = buffer.stride == 0 ? 1 : buffer.stride; - const u32 num_elements = stride * buffer.num_records; - const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))}; + const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))}; const Id struct_type{TypeStruct(record_array_type)}; if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); @@ -331,6 +331,7 @@ void EmitContext::DefineBuffers(const Info& info) { .id = id, .data_types = data_types, .pointer_type = pointer_type, + .buffer = buffer.GetVsharp(info), }); interfaces.push_back(id); i++; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index b51edd63..0f8081fd 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -201,6 +201,7 @@ public: Id id; const VectorIds* data_types; Id pointer_type; + AmdGpu::Buffer buffer; }; u32& binding; diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index cc631ff2..51840537 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -7,7 +7,7 @@ namespace Shader::Gcn { void Translator::EXP(const GcnInst& inst) { - if (ir.block->has_multiple_predecessors) { + if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) { LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation"); ir.Discard(ir.LogicalNot(ir.GetExec())); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 96f08519..6e5f7f8b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -254,8 +254,7 @@ void Translator::EmitFetch(const GcnInst& inst) { info.buffers.push_back({ .sgpr_base = attrib.sgpr_base, .dword_offset = attrib.dword_offset, - .stride = buffer.GetStride(), - .num_records = buffer.num_records, + .length = buffer.num_records, .used_types = IR::Type::F32, .is_storage = true, // we may not fit into UBO with large meshes .is_instance_data = true, @@ -457,6 +456,8 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l translator.IMAGE_GET_LOD(inst); break; case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_LZ_O: translator.IMAGE_GATHER(inst); break; case Opcode::IMAGE_STORE: @@ -571,28 +572,40 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l translator.V_CNDMASK_B32(inst); break; case Opcode::TBUFFER_LOAD_FORMAT_X: - translator.BUFFER_LOAD_FORMAT(1, true, inst); + translator.BUFFER_LOAD_FORMAT(1, true, true, inst); break; case Opcode::TBUFFER_LOAD_FORMAT_XY: - translator.BUFFER_LOAD_FORMAT(2, true, inst); + translator.BUFFER_LOAD_FORMAT(2, true, true, inst); break; case Opcode::TBUFFER_LOAD_FORMAT_XYZ: - translator.BUFFER_LOAD_FORMAT(3, true, inst); + translator.BUFFER_LOAD_FORMAT(3, true, true, inst); break; case Opcode::TBUFFER_LOAD_FORMAT_XYZW: - translator.BUFFER_LOAD_FORMAT(4, true, inst); + translator.BUFFER_LOAD_FORMAT(4, true, true, inst); break; case Opcode::BUFFER_LOAD_FORMAT_X: - case Opcode::BUFFER_LOAD_DWORD: - translator.BUFFER_LOAD_FORMAT(1, false, inst); + translator.BUFFER_LOAD_FORMAT(1, false, true, inst); + break; + case Opcode::BUFFER_LOAD_FORMAT_XY: + translator.BUFFER_LOAD_FORMAT(2, false, true, inst); break; case Opcode::BUFFER_LOAD_FORMAT_XYZ: - case Opcode::BUFFER_LOAD_DWORDX3: - translator.BUFFER_LOAD_FORMAT(3, false, inst); + translator.BUFFER_LOAD_FORMAT(3, false, true, inst); break; case Opcode::BUFFER_LOAD_FORMAT_XYZW: + translator.BUFFER_LOAD_FORMAT(4, false, true, inst); + break; + case Opcode::BUFFER_LOAD_DWORD: + translator.BUFFER_LOAD_FORMAT(1, false, false, inst); + break; + case Opcode::BUFFER_LOAD_DWORDX2: + translator.BUFFER_LOAD_FORMAT(2, false, false, inst); + break; + case Opcode::BUFFER_LOAD_DWORDX3: + translator.BUFFER_LOAD_FORMAT(3, false, false, inst); + break; case Opcode::BUFFER_LOAD_DWORDX4: - translator.BUFFER_LOAD_FORMAT(4, false, inst); + translator.BUFFER_LOAD_FORMAT(4, false, false, inst); break; case Opcode::BUFFER_STORE_FORMAT_X: case Opcode::BUFFER_STORE_DWORD: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6dd0a481..2aa6f712 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -160,7 +160,7 @@ public: void V_CMP_CLASS_F32(const GcnInst& inst); // Vector Memory - void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); + void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); // Vector interpolation diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index f0ef85b3..f4383c61 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -158,6 +158,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); info.force_level0.Assign(flags.test(MimgModifier::Level0)); info.explicit_lod.Assign(explicit_lod); + info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { @@ -225,7 +226,8 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { ir.ImageWrite(handle, body, value, {}); } -void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, + const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; @@ -254,7 +256,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); + const IR::Value value = is_format ? ir.LoadBufferFormat(num_dwords, handle, address, info) + : ir.LoadBuffer(num_dwords, handle, address, info); const IR::VectorReg dst_reg{inst.src[1].code}; if (num_dwords == 1) { ir.SetVectorReg(dst_reg, IR::F32{value}); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 5dabbb4c..cd4fdaa2 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -327,6 +327,22 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad } } +Value IREmitter::LoadBufferFormat(int num_dwords, const Value& handle, const Value& address, + BufferInstInfo info) { + switch (num_dwords) { + case 1: + return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address); + case 2: + return Inst(Opcode::LoadBufferFormatF32x2, Flags{info}, handle, address); + case 3: + return Inst(Opcode::LoadBufferFormatF32x3, Flags{info}, handle, address); + case 4: + return Inst(Opcode::LoadBufferFormatF32x4, Flags{info}, handle, address); + default: + UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); + } +} + void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info) { switch (num_dwords) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 5d6fd714..e7512430 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -89,6 +89,8 @@ public: [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info); + [[nodiscard]] Value LoadBufferFormat(int num_dwords, const Value& handle, const Value& address, + BufferInstInfo info); void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 94ef1784..9aefc8b3 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -79,6 +79,10 @@ OPCODE(LoadBufferF32, F32, Opaq OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, ) OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, ) OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32, F32, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32x2, F32x2, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32x3, F32x3, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32x4, F32x4, Opaque, Opaque, ) OPCODE(LoadBufferU32, U32, Opaque, Opaque, ) OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, ) OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b7d6a722..6526ece6 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -27,6 +27,10 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x3: case IR::Opcode::LoadBufferF32x4: + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::LoadBufferFormatF32x2: + case IR::Opcode::LoadBufferFormatF32x3: + case IR::Opcode::LoadBufferFormatF32x4: case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBufferU32: @@ -41,8 +45,49 @@ bool IsBufferInstruction(const IR::Inst& inst) { } } -IR::Type BufferDataType(const IR::Inst& inst) { +static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + switch (num_format) { + case AmdGpu::NumberFormat::Float: + switch (data_format) { + case AmdGpu::DataFormat::Format16: + case AmdGpu::DataFormat::Format16_16: + case AmdGpu::DataFormat::Format16_16_16_16: + return true; + default: + return false; + } + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::Uscaled: + case AmdGpu::NumberFormat::Sscaled: + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberFormat::SnormNz: + default: + return false; + } +} + +IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::LoadBufferFormatF32x2: + case IR::Opcode::LoadBufferFormatF32x3: + case IR::Opcode::LoadBufferFormatF32x4: + switch (num_format) { + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Snorm: + case AmdGpu::NumberFormat::Uscaled: + case AmdGpu::NumberFormat::Sscaled: + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberFormat::SnormNz: + return IR::Type::U32; + case AmdGpu::NumberFormat::Float: + return IR::Type::F32; + default: + UNREACHABLE(); + } case IR::Opcode::LoadBufferF32: case IR::Opcode::LoadBufferF32x2: case IR::Opcode::LoadBufferF32x3: @@ -141,7 +186,7 @@ public: desc.inline_cbuf == existing.inline_cbuf; })}; auto& buffer = buffer_resources[index]; - ASSERT(buffer.stride == desc.stride && buffer.num_records == desc.num_records); + ASSERT(buffer.length == desc.length); buffer.is_storage |= desc.is_storage; buffer.used_types |= desc.used_types; return index; @@ -263,6 +308,41 @@ SharpLocation TrackSharp(const IR::Inst* inst) { static constexpr size_t MaxUboSize = 65536; +static bool IsLoadBufferFormat(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::LoadBufferFormatF32x2: + case IR::Opcode::LoadBufferFormatF32x3: + case IR::Opcode::LoadBufferFormatF32x4: + return true; + default: + return false; + } +} + +static bool IsReadConstBuffer(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ReadConstBuffer: + case IR::Opcode::ReadConstBufferU32: + return true; + default: + return false; + } +} + +static u32 BufferLength(const AmdGpu::Buffer& buffer) { + const auto stride = buffer.GetStride(); + if (stride < sizeof(f32)) { + ASSERT(sizeof(f32) % stride == 0); + return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride; + } else if (stride == sizeof(f32)) { + return buffer.num_records; + } else { + ASSERT(stride % sizeof(f32) == 0); + return buffer.num_records * (stride / sizeof(f32)); + } +} + s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) { @@ -298,9 +378,8 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return descriptors.Add(BufferResource{ .sgpr_base = std::numeric_limits::max(), .dword_offset = 0, - .stride = cbuf.GetStride(), - .num_records = u32(cbuf.num_records), - .used_types = BufferDataType(inst), + .length = BufferLength(cbuf), + .used_types = BufferDataType(inst, cbuf.GetNumberFmt()), .inline_cbuf = cbuf, .is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize, }); @@ -318,9 +397,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, binding = descriptors.Add(BufferResource{ .sgpr_base = sharp.sgpr_base, .dword_offset = sharp.dword_offset, - .stride = buffer.GetStride(), - .num_records = u32(buffer.num_records), - .used_types = BufferDataType(inst), + .length = BufferLength(buffer), + .used_types = BufferDataType(inst, buffer.GetNumberFmt()), .is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize, }); } @@ -337,24 +415,31 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, inst_info.dmft == AmdGpu::DataFormat::Format32_32 || inst_info.dmft == AmdGpu::DataFormat::Format32)); } - if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || - inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { + + if (IsReadConstBuffer(inst)) { return; } - // Calculate buffer address. - const u32 dword_stride = buffer.GetStrideElements(sizeof(u32)); - const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32); - IR::U32 address = ir.Imm32(dword_offset); - if (inst_info.index_enable && inst_info.offset_enable) { - const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 1)}; - const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 0)}; - address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); - address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2))); - } else if (inst_info.index_enable) { - const IR::U32 index{inst.Arg(1)}; - address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address); - } else if (inst_info.offset_enable) { - const IR::U32 offset{inst.Arg(1)}; + + if (IsLoadBufferFormat(inst)) { + if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) { + info.uses_fp16 = true; + } + } else { + const u32 stride = buffer.GetStride(); + ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}", + stride); + } + + IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); + if (inst_info.index_enable) { + const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)} + : IR::U32{inst.Arg(1)}; + address = ir.IAdd(address, ir.IMul(index, ir.Imm32(buffer.GetStride()))); + } + if (inst_info.offset_enable) { + const IR::U32 offset = inst_info.index_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)} + : IR::U32{inst.Arg(1)}; + address = ir.IAdd(address, offset); } inst.SetArg(1, address); } @@ -458,7 +543,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_offset) { // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const u32 arg_pos = inst_info.is_depth ? 4 : 3; + const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather || + inst.GetOpcode() == IR::Opcode::ImageGatherDref; + const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3); const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 8a24a68b..6a43ad6b 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -173,7 +173,7 @@ public: } template - IR::Value ReadVariable(Type variable, IR::Block* root_block) { + IR::Value ReadVariable(Type variable, IR::Block* root_block, bool is_thread_bit = false) { boost::container::small_vector, 64> stack{ ReadState(nullptr), ReadState(root_block), @@ -201,7 +201,7 @@ public: } else if (!block->IsSsaSealed()) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; @@ -214,7 +214,7 @@ public: } else { // Break potential cycles with operandless phi IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); WriteVariable(variable, block, IR::Value{phi}); @@ -263,7 +263,9 @@ private: template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmPredecessors()) { - phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); + const bool is_thread_bit = + std::is_same_v && phi.Flags() == IR::Type::U1; + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred, is_thread_bit)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } @@ -346,7 +348,8 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + inst.ReplaceUsesWith( + pass.ReadVariable(reg, block, opcode == IR::Opcode::GetThreadBitScalarReg)); break; } case IR::Opcode::GetVectorRegister: { diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index ae38ecf3..d9e9b030 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -39,6 +39,7 @@ union TextureInstInfo { BitField<3, 1, u32> force_level0; BitField<4, 1, u32> explicit_lod; BitField<5, 1, u32> has_offset; + BitField<6, 2, u32> gather_comp; }; union BufferInstInfo { diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 054faafe..8824e344 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -17,11 +17,12 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; enum class Stage : u32 { - Vertex, - TessellationControl, - TessellationEval, - Geometry, Fragment, + Vertex, + Geometry, + Export, + Hull, + Local, Compute, }; constexpr u32 MaxStageTypes = 6; @@ -73,8 +74,7 @@ struct Info; struct BufferResource { u32 sgpr_base; u32 dword_offset; - u32 stride; - u32 num_records; + u32 length; IR::Type used_types; AmdGpu::Buffer inline_cbuf; bool is_storage{false}; @@ -204,7 +204,7 @@ struct fmt::formatter { return ctx.begin(); } auto format(const Shader::Stage& stage, format_context& ctx) const { - constexpr static std::array names = {"vs", "tc", "te", "gs", "fs", "cs"}; + constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); } }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0711b074..84539c28 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -837,7 +837,15 @@ struct Liverpool { ShaderProgram ps_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram vs_program; - INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16); + INSERT_PADDING_WORDS(0x2C); + ShaderProgram gs_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram es_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram hs_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram ls_program; + INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; @@ -916,12 +924,19 @@ struct Liverpool { const ShaderProgram* ProgramForStage(u32 index) const { switch (index) { case 0: - return &vs_program; - case 4: return &ps_program; - default: - return nullptr; + case 1: + return &vs_program; + case 2: + return &gs_program; + case 3: + return &es_program; + case 4: + return &hs_program; + case 5: + return &ls_program; } + return nullptr; } }; @@ -1018,7 +1033,7 @@ private: Vulkan::Rasterizer* rasterizer{}; std::jthread process_thread{}; - u32 num_submits{}; + std::atomic num_submits{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; }; @@ -1026,6 +1041,10 @@ private: static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); +static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); +static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); +static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); +static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 5f6eb903..6618e72a 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -66,4 +66,110 @@ int NumBits(DataFormat format) { return num_bits_per_element[index]; } +static constexpr std::array component_bits = { + std::array{0, 0, 0, 0}, // 0 FormatInvalid + std::array{8, 0, 0, 0}, // 1 Format8 + std::array{16, 0, 0, 0}, // 2 Format16 + std::array{8, 8, 0, 0}, // 3 Format8_8 + std::array{32, 0, 0, 0}, // 4 Format32 + std::array{16, 16, 0, 0}, // 5 Format16_16 + std::array{10, 11, 11, 0}, // 6 Format10_11_11 + std::array{11, 11, 10, 0}, // 7 Format11_11_10 + std::array{10, 10, 10, 2}, // 8 Format10_10_10_2 + std::array{2, 10, 10, 10}, // 9 Format2_10_10_10 + std::array{8, 8, 8, 8}, // 10 Format8_8_8_8 + std::array{32, 32, 0, 0}, // 11 Format32_32 + std::array{16, 16, 16, 16}, // 12 Format16_16_16_16 + std::array{32, 32, 32, 0}, // 13 Format32_32_32 + std::array{32, 32, 32, 32}, // 14 Format32_32_32_32 + std::array{0, 0, 0, 0}, // 15 + std::array{5, 6, 5, 0}, // 16 Format5_6_5 + std::array{1, 5, 5, 5}, // 17 Format1_5_5_5 + std::array{5, 5, 5, 1}, // 18 Format5_5_5_1 + std::array{4, 4, 4, 4}, // 19 Format4_4_4_4 + std::array{8, 24, 0, 0}, // 20 Format8_24 + std::array{24, 8, 0, 0}, // 21 Format24_8 + std::array{24, 8, 0, 0}, // 22 FormatX24_8_32 + std::array{0, 0, 0, 0}, // 23 + std::array{0, 0, 0, 0}, // 24 + std::array{0, 0, 0, 0}, // 25 + std::array{0, 0, 0, 0}, // 26 + std::array{0, 0, 0, 0}, // 27 + std::array{0, 0, 0, 0}, // 28 + std::array{0, 0, 0, 0}, // 29 + std::array{0, 0, 0, 0}, // 30 + std::array{0, 0, 0, 0}, // 31 + std::array{0, 0, 0, 0}, // 32 FormatGB_GR + std::array{0, 0, 0, 0}, // 33 FormatBG_RG + std::array{0, 0, 0, 0}, // 34 Format5_9_9_9 + std::array{0, 0, 0, 0}, // 35 FormatBc1 + std::array{0, 0, 0, 0}, // 36 FormatBc2 + std::array{0, 0, 0, 0}, // 37 FormatBc3 + std::array{0, 0, 0, 0}, // 38 FormatBc4 + std::array{0, 0, 0, 0}, // 39 FormatBc5 + std::array{0, 0, 0, 0}, // 40 FormatBc6 + std::array{0, 0, 0, 0}, // 41 FormatBc7 +}; + +u32 ComponentBits(DataFormat format, u32 comp) { + const u32 index = static_cast(format); + if (index >= component_bits.size() || comp >= 4) { + return 0; + } + return component_bits[index][comp]; +} + +static constexpr std::array component_offset = { + std::array{-1, -1, -1, -1}, // 0 FormatInvalid + std::array{0, -1, -1, -1}, // 1 Format8 + std::array{0, -1, -1, -1}, // 2 Format16 + std::array{0, 8, -1, -1}, // 3 Format8_8 + std::array{0, -1, -1, -1}, // 4 Format32 + std::array{0, 16, -1, -1}, // 5 Format16_16 + std::array{0, 10, 21, -1}, // 6 Format10_11_11 + std::array{0, 11, 22, -1}, // 7 Format11_11_10 + std::array{0, 10, 20, 30}, // 8 Format10_10_10_2 + std::array{0, 2, 12, 22}, // 9 Format2_10_10_10 + std::array{0, 8, 16, 24}, // 10 Format8_8_8_8 + std::array{0, 32, -1, -1}, // 11 Format32_32 + std::array{0, 16, 32, 48}, // 12 Format16_16_16_16 + std::array{0, 32, 64, -1}, // 13 Format32_32_32 + std::array{0, 32, 64, 96}, // 14 Format32_32_32_32 + std::array{-1, -1, -1, -1}, // 15 + std::array{0, 5, 11, -1}, // 16 Format5_6_5 + std::array{0, 1, 6, 11}, // 17 Format1_5_5_5 + std::array{0, 5, 10, 15}, // 18 Format5_5_5_1 + std::array{0, 4, 8, 12}, // 19 Format4_4_4_4 + std::array{0, 8, -1, -1}, // 20 Format8_24 + std::array{0, 24, -1, -1}, // 21 Format24_8 + std::array{0, 24, -1, -1}, // 22 FormatX24_8_32 + std::array{-1, -1, -1, -1}, // 23 + std::array{-1, -1, -1, -1}, // 24 + std::array{-1, -1, -1, -1}, // 25 + std::array{-1, -1, -1, -1}, // 26 + std::array{-1, -1, -1, -1}, // 27 + std::array{-1, -1, -1, -1}, // 28 + std::array{-1, -1, -1, -1}, // 29 + std::array{-1, -1, -1, -1}, // 30 + std::array{-1, -1, -1, -1}, // 31 + std::array{-1, -1, -1, -1}, // 32 FormatGB_GR + std::array{-1, -1, -1, -1}, // 33 FormatBG_RG + std::array{-1, -1, -1, -1}, // 34 Format5_9_9_9 + std::array{-1, -1, -1, -1}, // 35 FormatBc1 + std::array{-1, -1, -1, -1}, // 36 FormatBc2 + std::array{-1, -1, -1, -1}, // 37 FormatBc3 + std::array{-1, -1, -1, -1}, // 38 FormatBc4 + std::array{-1, -1, -1, -1}, // 39 FormatBc5 + std::array{-1, -1, -1, -1}, // 40 FormatBc6 + std::array{-1, -1, -1, -1}, // 41 FormatBc7 +}; + +s32 ComponentOffset(DataFormat format, u32 comp) { + const u32 index = static_cast(format); + if (index >= component_offset.size() || comp >= 4) { + return -1; + } + return component_offset[index][comp]; +} + } // namespace AmdGpu diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 22d102af..2a38c5a0 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -65,6 +65,8 @@ enum class NumberFormat : u32 { int NumComponents(DataFormat format); int NumBits(DataFormat format); +u32 ComponentBits(DataFormat format, u32 comp); +s32 ComponentOffset(DataFormat format, u32 comp); } // namespace AmdGpu diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba3de154..1247c025 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -62,14 +62,6 @@ struct Buffer { return stride == 0 ? 1U : stride; } - u32 GetStrideElements(u32 element_size) const noexcept { - if (stride == 0) { - return 1U; - } - ASSERT(stride % element_size == 0); - return stride / element_size; - } - u32 GetSize() const noexcept { return GetStride() * num_records; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 74c4ffe8..a59fad08 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -420,6 +420,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR32G32B32A32Uint; } + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && + num_format == AmdGpu::NumberFormat::Sint) { + return vk::Format::eR32G32B32A32Sint; + } if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR8Sint; } @@ -444,6 +448,12 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format16_16 && num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR16G16Uint; + if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR8Uint; + } + if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && + num_format == AmdGpu::NumberFormat::SnormNz) { + return vk::Format::eR16G16B16A16Snorm; } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 01901836..a3ba2f77 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -48,7 +48,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector bindings; boost::container::static_vector attributes; - const auto& vs_info = stages[0]; + const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; for (const auto& input : vs_info.vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { @@ -179,20 +179,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .maxDepthBounds = key.depth_bounds_max, }; - u32 shader_count = 1; + u32 shader_count{}; + auto stage = u32(Shader::Stage::Vertex); std::array shader_stages; - shader_stages[0] = vk::PipelineShaderStageCreateInfo{ + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, - .module = modules[0], + .module = modules[stage], .pName = "main", }; - if (modules[4]) { - shader_stages[1] = vk::PipelineShaderStageCreateInfo{ + stage = u32(Shader::Stage::Fragment); + if (modules[stage]) { + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, - .module = modules[4], + .module = modules[stage], .pName = "main", }; - ++shader_count; } const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); @@ -411,7 +412,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { - const auto& vs_info = stages[0]; + const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; if (vs_info.vs_inputs.empty()) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ab8be78f..e1564f8f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -77,7 +77,7 @@ public: bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[0] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 84eea78c..7f0b74ab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -256,6 +256,12 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { block_pool.ReleaseContents(); inst_pool.ReleaseContents(); + if (stage != Shader::Stage::Compute && stage != Shader::Stage::Fragment && + stage != Shader::Stage::Vertex) { + LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage); + return {}; + } + // Recompile shader to IR. try { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d0944fcc..fff9bc33 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 512_MB, BufferType::Upload} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); }