From 96fb00d41192e3c0a86eabf7d2b5d5a6e5fa2cd5 Mon Sep 17 00:00:00 2001 From: Dzmitry Dubrova Date: Fri, 9 Aug 2024 17:09:51 +0300 Subject: [PATCH 01/23] gui: Implement settings dialog --- CMakeLists.txt | 7 +- src/common/config.cpp | 78 ++++ src/common/config.h | 21 +- src/qt_gui/main_window.cpp | 6 + src/qt_gui/settings_dialog.cpp | 116 ++++++ src/qt_gui/settings_dialog.h | 28 ++ src/qt_gui/settings_dialog.ui | 646 +++++++++++++++++++++++++++++++++ 7 files changed, 900 insertions(+), 2 deletions(-) create mode 100644 src/qt_gui/settings_dialog.cpp create mode 100644 src/qt_gui/settings_dialog.h create mode 100644 src/qt_gui/settings_dialog.ui diff --git a/CMakeLists.txt b/CMakeLists.txt index 4df3db2b..b92dd932 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,8 @@ if(ENABLE_QT_GUI) find_package(Qt6 REQUIRED COMPONENTS Widgets Concurrent) qt_standard_project_setup() set(CMAKE_AUTORCC ON) + set(CMAKE_AUTOMOC ON) + set(CMAKE_AUTOUIC ON) endif() set(AUDIO_CORE src/audio_core/sdl_audio.cpp @@ -546,10 +548,13 @@ set(QT_GUI src/qt_gui/elf_viewer.h src/qt_gui/main_window_themes.cpp src/qt_gui/main_window_themes.h + src/qt_gui/settings_dialog.cpp + src/qt_gui/settings_dialog.h + src/qt_gui/settings_dialog.ui src/qt_gui/main.cpp ${EMULATOR} ${RESOURCE_FILES} - ) +) endif() if (ENABLE_QT_GUI) diff --git a/src/common/config.cpp b/src/common/config.cpp index f676ab94..c105650b 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -114,6 +114,66 @@ bool vkValidationSyncEnabled() { return vkValidationSync; } +void setScreenWidth(u32 width) { + screenWidth = width; +} + +void setScreenHeight(u32 height) { + screenHeight = height; +} + +void setDebugDump(bool enable) { + isDebugDump = enable; +} + +void setShowSplash(bool enable) { + isShowSplash = enable; +} + +void setNullGpu(bool enable) { + isNullGpu = enable; +} + +void setDumpShaders(bool enable) { + shouldDumpShaders = enable; +} + +void setDumpPM4(bool enable) { + shouldDumpPM4 = enable; +} + +void setVkValidation(bool enable) { + vkValidation = enable; +} + +void setVkSyncValidation(bool enable) { + vkValidationSync = enable; +} + +void setRdocEnabled(bool enable) { + rdocEnable = enable; +} + +void setVblankDiv(u32 value) { + vblankDivider = value; +} + +void setFullscreenMode(bool enable) { + isFullscreen = enable; +} + +void setNeoMode(bool enable) { + isNeo = enable; +} + +void setLogType(std::string type) { + logType = type; +} + +void setLogFilter(std::string type) { + logFilter = type; +} + void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -356,4 +416,22 @@ void save(const std::filesystem::path& path) { file << data; file.close(); } + +void setDefaultValues() { + isNeo = false; + isFullscreen = false; + screenWidth = 1280; + screenHeight = 720; + logFilter = ""; + logType = "async"; + isDebugDump = false; + isShowSplash = false; + isNullGpu = false; + shouldDumpShaders = false; + shouldDumpPM4 = false; + vblankDivider = 1; + vkValidation = false; + rdocEnable = false; +} + } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 53c88ec9..6174b1e1 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -29,6 +29,24 @@ bool dumpPM4(); bool isRdocEnabled(); u32 vblankDiv(); +void setDebugDump(bool enable); +void setShowSplash(bool enable); +void setNullGpu(bool enable); +void setDumpShaders(bool enable); +void setDumpPM4(bool enable); +void setVblankDiv(u32 value); +void setScreenWidth(u32 width); +void setScreenHeight(u32 height); +void setFullscreenMode(bool enable); +void setNeoMode(bool enable); + +void setLogType(std::string type); +void setLogFilter(std::string type); + +void setVkValidation(bool enable); +void setVkSyncValidation(bool enable); +void setRdocEnabled(bool enable); + bool vkValidationEnabled(); bool vkValidationSyncEnabled(); @@ -64,7 +82,8 @@ std::vector getPkgViewer(); std::vector getElfViewer(); std::vector getRecentFiles(); +void setDefaultValues(); + // settings u32 GetLanguage(); - }; // namespace Config diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 646433ee..55bd5640 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -15,6 +15,7 @@ #include "core/loader.h" #include "game_install_dialog.h" #include "main_window.h" +#include "settings_dialog.h" MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); @@ -185,6 +186,11 @@ void MainWindow::CreateConnects() { connect(m_game_list_frame.get(), &QTableWidget::cellDoubleClicked, this, &MainWindow::StartGame); + connect(ui->settingsButton, &QPushButton::clicked, this, [this]() { + auto settingsDialog = new SettingsDialog(this); + settingsDialog->exec(); + }); + connect(ui->setIconSizeTinyAct, &QAction::triggered, this, [this]() { if (isTableList) { m_game_list_frame->icon_size = diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp new file mode 100644 index 00000000..88c91ef6 --- /dev/null +++ b/src/qt_gui/settings_dialog.cpp @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "settings_dialog.h" +#include "ui_settings_dialog.h" + +SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::SettingsDialog) { + ui->setupUi(this); + ui->tabWidgetSettings->setUsesScrollButtons(false); + const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + + ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + + LoadValuesFromConfig(); + + connect(ui->buttonBox, &QDialogButtonBox::rejected, this, &QWidget::close); + + connect(ui->buttonBox, &QDialogButtonBox::clicked, this, + [this, config_dir](QAbstractButton* button) { + if (button == ui->buttonBox->button(QDialogButtonBox::Save)) { + Config::save(config_dir / "config.toml"); + QWidget::close(); + } else if (button == ui->buttonBox->button(QDialogButtonBox::Apply)) { + Config::save(config_dir / "config.toml"); + } else if (button == ui->buttonBox->button(QDialogButtonBox::RestoreDefaults)) { + Config::setDefaultValues(); + LoadValuesFromConfig(); + } + }); + + connect(ui->tabWidgetSettings, &QTabWidget::currentChanged, this, [this]() { + ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + }); + + // GPU TAB + { + // TODO: Implement graphics device changing + + connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setScreenWidth(val); }); + + connect(ui->heightSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setScreenHeight(val); }); + + connect(ui->vblankSpinBox, &QSpinBox::valueChanged, this, + [](int val) { Config::setVblankDiv(val); }); + + connect(ui->dumpShadersCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setDumpShaders(val); }); + + connect(ui->nullGpuCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setNullGpu(val); }); + + connect(ui->dumpPM4CheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setDumpPM4(val); }); + } + + // GENERAL TAB + { + connect(ui->fullscreenCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setFullscreenMode(val); }); + + connect(ui->showSplashCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setShowSplash(val); }); + + connect(ui->ps4proCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setNeoMode(val); }); + + connect(ui->logTypeComboBox, &QComboBox::currentTextChanged, this, + [](const QString& text) { Config::setLogType(text.toStdString()); }); + + connect(ui->logFilterLineEdit, &QLineEdit::textChanged, this, + [](const QString& text) { Config::setLogFilter(text.toStdString()); }); + } + + // DEBUG TAB + { + connect(ui->debugDump, &QCheckBox::stateChanged, this, + [](int val) { Config::setDebugDump(val); }); + + connect(ui->vkValidationCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setVkValidation(val); }); + + connect(ui->vkSyncValidationCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setVkSyncValidation(val); }); + + connect(ui->rdocCheckBox, &QCheckBox::stateChanged, this, + [](int val) { Config::setRdocEnabled(val); }); + } +} + +void SettingsDialog::LoadValuesFromConfig() { + ui->widthSpinBox->setValue(Config::getScreenWidth()); + ui->heightSpinBox->setValue(Config::getScreenHeight()); + ui->vblankSpinBox->setValue(Config::vblankDiv()); + ui->dumpShadersCheckBox->setChecked(Config::dumpShaders()); + ui->nullGpuCheckBox->setChecked(Config::nullGpu()); + ui->dumpPM4CheckBox->setChecked(Config::dumpPM4()); + + ui->fullscreenCheckBox->setChecked(Config::isFullscreenMode()); + ui->showSplashCheckBox->setChecked(Config::showSplash()); + ui->ps4proCheckBox->setChecked(Config::isNeoMode()); + ui->logTypeComboBox->setCurrentText(QString::fromStdString(Config::getLogType())); + ui->logFilterLineEdit->setText(QString::fromStdString(Config::getLogFilter())); + + ui->debugDump->setChecked(Config::debugDump()); + ui->vkValidationCheckBox->setChecked(Config::vkValidationEnabled()); + ui->vkSyncValidationCheckBox->setChecked(Config::vkValidationSyncEnabled()); + ui->rdocCheckBox->setChecked(Config::isRdocEnabled()); +} + +int SettingsDialog::exec() { + return QDialog::exec(); +} + +SettingsDialog::~SettingsDialog() {} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h new file mode 100644 index 00000000..2bffa795 --- /dev/null +++ b/src/qt_gui/settings_dialog.h @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/config.h" +#include "common/path_util.h" + +namespace Ui { +class SettingsDialog; +} + +class SettingsDialog : public QDialog { + Q_OBJECT +public: + explicit SettingsDialog(QWidget* parent = nullptr); + ~SettingsDialog(); + + int exec() override; + +private: + void LoadValuesFromConfig(); + + std::unique_ptr ui; +}; diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui new file mode 100644 index 00000000..507980eb --- /dev/null +++ b/src/qt_gui/settings_dialog.ui @@ -0,0 +1,646 @@ + + + SettingsDialog + + + Qt::WindowModality::WindowModal + + + + 0 + 0 + 1024 + 768 + + + + + 0 + 0 + + + + Settings + + + + :/images/shadps4.ico:/images/shadps4.ico + + + + + + QFrame::Shape::NoFrame + + + true + + + + true + + + + 0 + 0 + 1006 + 720 + + + + + 0 + 0 + + + + 1 + + + + GPU + + + + + + + + + + Graphics Device + + + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + 6 + + + 0 + + + + + + + Width + + + + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 0 + + + 9999 + + + 1280 + + + + + + + + + + Height + + + + + + true + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 0 + + + 9999 + + + 720 + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Vblank Divider + + + + + + true + + + true + + + QAbstractSpinBox::CorrectionMode::CorrectToNearestValue + + + false + + + 1 + + + 9999 + + + 1 + + + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + 12 + + + 12 + + + + + Additional Settings + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignVCenter + + + + + + Enable Shaders Dumping + + + + + + + Enable NULL GPU + + + + + + + Enable PM4 Dumping + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + General + + + + + + + + + + Emulator Settings + + + + + + Enable Fullscreen + + + + + + + Show Splash + + + + + + + Is PS4 Pro + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + + + Logger Settings + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Log Type + + + + + + + async + + + + + sync + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Log Filter + + + + + + + + + + + + + + + + + + + + + + + Additional Settings + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + Debug + + + + + + + + true + + + General + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + Enable Debug Dumping + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + Enable Vulkan Validation Layers + + + + + + + Enable Vulkan Synchronization Validation + + + + + + + Enable RenderDoc Debugging + + + + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + QDialogButtonBox::StandardButton::Apply|QDialogButtonBox::StandardButton::Close|QDialogButtonBox::StandardButton::RestoreDefaults|QDialogButtonBox::StandardButton::Save + + + + + + + + From 3163cd135bcbedc29bb284ce9a624f6097bf2d6e Mon Sep 17 00:00:00 2001 From: Dzmitry Dubrova Date: Fri, 9 Aug 2024 18:19:35 +0300 Subject: [PATCH 02/23] gui: Add console language to settings --- src/common/config.cpp | 5 + src/common/config.h | 1 + src/qt_gui/settings_dialog.cpp | 8 + src/qt_gui/settings_dialog.ui | 288 +++++++++++++++++++++++++++++++-- 4 files changed, 289 insertions(+), 13 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index c105650b..ebdd9c32 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -162,6 +162,10 @@ void setFullscreenMode(bool enable) { isFullscreen = enable; } +void setLanguage(u32 language) { + m_language = language; +} + void setNeoMode(bool enable) { isNeo = enable; } @@ -432,6 +436,7 @@ void setDefaultValues() { vblankDivider = 1; vkValidation = false; rdocEnable = false; + m_language = 1; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 6174b1e1..ad0aad22 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -38,6 +38,7 @@ void setVblankDiv(u32 value); void setScreenWidth(u32 width); void setScreenHeight(u32 height); void setFullscreenMode(bool enable); +void setLanguage(u32 language); void setNeoMode(bool enable); void setLogType(std::string type); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 88c91ef6..722abe7e 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -32,6 +32,12 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); }); + // EMULATOR TAB + { + connect(ui->consoleLanguageComboBox, &QComboBox::currentIndexChanged, this, + [](int index) { Config::setLanguage(index); }); + } + // GPU TAB { // TODO: Implement graphics device changing @@ -90,6 +96,8 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se } void SettingsDialog::LoadValuesFromConfig() { + ui->consoleLanguageComboBox->setCurrentIndex(Config::GetLanguage()); + ui->widthSpinBox->setValue(Config::getScreenWidth()); ui->heightSpinBox->setValue(Config::getScreenHeight()); ui->vblankSpinBox->setValue(Config::vblankDiv()); diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index 507980eb..4893bd61 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -1,4 +1,7 @@ + + SettingsDialog @@ -54,31 +57,182 @@ - 1 + 0 - + - GPU + Emulator - + - + - + - + - Graphics Device + Console Language - + - + + + + Japanese + + + + + English (United States) + + + + + French (France) + + + + + Spanish (Spain) + + + + + German + + + + + Italian + + + + + Dutch + + + + + Portuguese (Portugal) + + + + + Russian + + + + + Korean + + + + + Traditional Chinese + + + + + Simplified Chinese + + + + + Finnish + + + + + Swedish + + + + + Danish + + + + + Norwegian + + + + + Polish + + + + + Portuguese (Brazil) + + + + + English (United Kingdom) + + + + + Turkish + + + + + Spanish (Latin America) + + + + + Arabic + + + + + French (Canada) + + + + + Czech + + + + + Hungarian + + + + + Greek + + + + + Romanian + + + + + Thai + + + + + Vietnamese + + + + + Indonesian + + + - + 0 @@ -96,7 +250,7 @@ - + 0 @@ -113,6 +267,114 @@ + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + + + + 12 + + + 12 + + + + + + + + + Qt::Orientation::Vertical + + + QSizePolicy::Policy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + GPU + + + + + + + + + + Graphics Device + + + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + @@ -294,7 +556,7 @@ Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignVCenter - + From 834e3a500e62d58f1fe94fb509dff0eaefd06b27 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Sun, 11 Aug 2024 13:16:50 +0300 Subject: [PATCH 03/23] added a fix for audio (seems that some games calls sceAudioOutInit twice) Thanks Roamic for tracing this! --- src/core/libraries/audio/audioout.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index cc7ce342..eac3845f 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -235,6 +235,9 @@ int PS4_SYSV_ABI sceAudioOutGetSystemState() { } int PS4_SYSV_ABI sceAudioOutInit() { + if (audio != nullptr) { + return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; + } audio = std::make_unique(); LOG_INFO(Lib_AudioOut, "called"); return ORBIS_OK; From 3e2d4d6b793fd14e918673fbacd40914a0714ea0 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Mon, 12 Aug 2024 13:29:57 +0200 Subject: [PATCH 04/23] Gnmdriver: More functions (#410) * libraries: gnmdriver: added `sceGnmGetGpuCoreClockFrequency` * libraries: gnmdriver: `sceGnmSetVgtControl` added * amdgpu: gpuclock64 in write eop packet --- src/core/libraries/gnmdriver/gnmdriver.cpp | 20 +++++++++++++++----- src/core/libraries/gnmdriver/gnmdriver.h | 5 +++-- src/video_core/amdgpu/pm4_cmds.h | 18 +++++++++++------- src/video_core/amdgpu/pm4_opcodes.h | 1 + 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 650252f9..c2ee6d59 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -956,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { - LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { + LOG_TRACE(Lib_GnmDriver, "called"); + return Config::isNeoMode() ? 911'000'000 : 800'000'000; } int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { @@ -1706,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetVgtControl() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) || + ((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) { + return -1; + } + + const u32 reg_value = + ((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu); + PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 8100b116..84872297 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp(); int PS4_SYSV_ABI sceGnmGetEqEventType(); int PS4_SYSV_ABI sceGnmGetEqTimeStamp(); int PS4_SYSV_ABI sceGnmGetGpuBlockStatus(); -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); int PS4_SYSV_ABI sceGnmGetGpuInfoStatus(); int PS4_SYSV_ABI sceGnmGetLastWaitedAddress(); int PS4_SYSV_ABI sceGnmGetNumTcaUnits(); @@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); -int PS4_SYSV_ABI sceGnmSetVgtControl(); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode); s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index e5f618cc..5ab233fd 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -282,6 +282,13 @@ enum class InterruptSelect : u32 { IrqUndocumented = 3, }; +static u64 GetGpuClock64() { + auto now = std::chrono::high_resolution_clock::now(); + auto duration = now.time_since_epoch(); + auto ticks = std::chrono::duration_cast(duration).count(); + return static_cast(ticks); +} + struct PM4CmdEventWriteEop { PM4Type3Header header; union { @@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop { *Address() = DataQWord(); break; } + case DataSelect::GpuClock64: { + *Address() = GetGpuClock64(); + break; + } case DataSelect::PerfCounter: { *Address() = Common::FencedRDTSC(); break; @@ -652,13 +663,6 @@ struct PM4CmdReleaseMem { return data_lo | u64(data_hi) << 32; } - uint64_t GetGpuClock64() const { - auto now = std::chrono::high_resolution_clock::now(); - auto duration = now.time_since_epoch(); - auto ticks = std::chrono::duration_cast(duration).count(); - return static_cast(ticks); - } - void SignalFence(Platform::InterruptId irq_id) const { switch (data_sel.Value()) { case DataSelect::Data32Low: { diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 8922c4ea..fba0cbb9 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 { CondIndirectBuffer = 0x3F, CopyData = 0x40, CommandProcessorDma = 0x41, + PfpSyncMe = 0x42, SurfaceSync = 0x43, CondWrite = 0x45, EventWrite = 0x46, From ace39957efa1f8b65902b7b197a9e8983dc99334 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Mon, 12 Aug 2024 13:46:45 +0200 Subject: [PATCH 05/23] Video Core: debug tools (#412) * video_core: better use of rdoc markers * renderer_vulkan: added gpu assisted validation * renderer_vulkan: make nv_checkpoints operational * video_core: unified Vulkan objects names --- src/common/config.cpp | 14 ++++ src/common/config.h | 2 + src/video_core/amdgpu/liverpool.cpp | 36 +++++++--- src/video_core/buffer_cache/buffer.cpp | 12 ++-- .../renderer_vulkan/vk_instance.cpp | 11 ++++ src/video_core/renderer_vulkan/vk_instance.h | 5 ++ .../renderer_vulkan/vk_platform.cpp | 65 ++++++++++++++++--- .../renderer_vulkan/vk_rasterizer.cpp | 30 ++++++++- .../renderer_vulkan/vk_rasterizer.h | 4 +- .../renderer_vulkan/vk_scheduler.cpp | 7 ++ src/video_core/texture_cache/image.cpp | 4 ++ 11 files changed, 161 insertions(+), 29 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index ebdd9c32..3cf9af15 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -25,7 +25,9 @@ static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; static bool vkValidation = false; static bool vkValidationSync = false; +static bool vkValidationGpu = false; static bool rdocEnable = false; +static bool rdocMarkersEnable = false; // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -102,6 +104,10 @@ bool isRdocEnabled() { return rdocEnable; } +bool isMarkersEnabled() { + return rdocMarkersEnable; +} + u32 vblankDiv() { return vblankDivider; } @@ -114,6 +120,10 @@ bool vkValidationSyncEnabled() { return vkValidationSync; } +bool vkValidationGpuEnabled() { + return vkValidationGpu; +} + void setScreenWidth(u32 width) { screenWidth = width; } @@ -319,7 +329,9 @@ void load(const std::filesystem::path& path) { gpuId = toml::find_or(vk, "gpuId", -1); vkValidation = toml::find_or(vk, "validation", false); vkValidationSync = toml::find_or(vk, "validation_sync", false); + vkValidationGpu = toml::find_or(vk, "validation_gpu", true); rdocEnable = toml::find_or(vk, "rdocEnable", false); + rdocMarkersEnable = toml::find_or(vk, "rdocMarkersEnable", false); } if (data.contains("Debug")) { @@ -394,7 +406,9 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["gpuId"] = gpuId; data["Vulkan"]["validation"] = vkValidation; data["Vulkan"]["validation_sync"] = vkValidationSync; + data["Vulkan"]["validation_gpu"] = vkValidationGpu; data["Vulkan"]["rdocEnable"] = rdocEnable; + data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; data["GUI"]["theme"] = mw_themes; diff --git a/src/common/config.h b/src/common/config.h index ad0aad22..37ace79c 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -27,6 +27,7 @@ bool nullGpu(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); +bool isMarkersEnabled(); u32 vblankDiv(); void setDebugDump(bool enable); @@ -50,6 +51,7 @@ void setRdocEnabled(bool enable); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); +bool vkValidationGpuEnabled(); // Gui void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index af1963ee..bd32b5b9 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -180,6 +180,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanSignal(Platform::InterruptId::GfxFlip); break; } + case PM4CmdNop::PayloadType::DebugMarkerPush: { + const auto marker_sz = nop->header.count.Value() * 2; + const std::string_view label{reinterpret_cast(&nop->data_block[1]), + marker_sz}; + rasterizer->ScopeMarkerBegin(label); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPop: { + rasterizer->ScopeMarkerEnd(); + break; + } default: break; } @@ -295,8 +306,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndex2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true); rasterizer->ScopeMarkerEnd(); } @@ -308,8 +320,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin(fmt::format( - "dcb:{}:DrawIndexOffset2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true, draw_index_off->index_offset); rasterizer->ScopeMarkerEnd(); } @@ -320,8 +333,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(false); rasterizer->ScopeMarkerEnd(); } @@ -334,8 +348,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:Dispatch", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -486,8 +501,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin(fmt::format( - "acb[{}]:{}:Dispatch", vqid, reinterpret_cast(acb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e9498b35..d112864d 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -106,10 +106,8 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_ VmaAllocationInfo alloc_info{}; buffer.Create(buffer_ci, usage, &alloc_info); - if (instance->HasDebuggingToolAttached()) { - const auto device = instance->GetDevice(); - Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024); - } + const auto device = instance->GetDevice(); + Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes); // Map it if it is host visible. VkMemoryPropertyFlags property_flags{}; @@ -152,10 +150,8 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); const auto device = instance.GetDevice(); - if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage), - size_bytes / 1024); - } + Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage), + size_bytes); } std::pair StreamBuffer::Map(u64 size, u64 alignment) { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2d396daf..eedba4c8 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -8,6 +8,7 @@ #include #include "common/assert.h" +#include "common/config.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -213,6 +214,13 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + + if (has_sync2) { + has_nv_checkpoints = Config::isMarkersEnabled() + ? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME) + : false; + } const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -308,6 +316,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT{ .nullDescriptor = true, }, + vk::PhysicalDeviceSynchronization2Features{ + .synchronization2 = has_sync2, + }, }; if (!color_write_en) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index a8c0dcf4..2f2397d6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -88,6 +88,10 @@ public: return profiler_context; } + bool HasNvCheckpoints() const { + return has_nv_checkpoints; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -259,6 +263,7 @@ private: bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool has_nv_checkpoints{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 0915514b..33113c58 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -221,12 +221,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT vk::Bool32 enable_sync = enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::LayerSettingEXT layer_set = { - .pLayerName = VALIDATION_LAYER_NAME, - .pSettingName = "validate_sync", - .type = vk::LayerSettingTypeEXT::eBool32, - .valueCount = 1, - .pValues = &enable_sync, + vk::Bool32 enable_gpuav = + enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() + ? "GPU_BASED_GPU_ASSISTED" + : "GPU_BASED_NONE"; + const std::array layer_setings = { + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_sync", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "sync_queue_submit", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_gpu_based", + .type = vk::LayerSettingTypeEXT::eString, + .valueCount = 1, + .pValues = &gpuav_mode, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_reserve_binding_slot", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_descriptor_checks", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_validate_indirect_buffer", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_buffer_copies", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, }; vk::StructureChain instance_ci_chain = { @@ -238,8 +287,8 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .ppEnabledExtensionNames = extensions.data(), }, vk::LayerSettingsCreateInfoEXT{ - .settingCount = 1, - .pSettings = &layer_set, + .settingCount = layer_setings.size(), + .pSettings = layer_setings.data(), }, }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 51de09f7..b6e43a1a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -230,16 +230,42 @@ void Rasterizer::UpdateDepthStencilState() { cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); } -void Rasterizer::ScopeMarkerBegin(const std::string& str) { +void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = str.c_str(), + .pLabelName = str.data(), }); } void Rasterizer::ScopeMarkerEnd() { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.endDebugUtilsLabelEXT(); } +void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = str.data(), + }); +} + +void Rasterizer::Breadcrumb(u64 id) { + if (!instance.HasNvCheckpoints()) { + return; + } + scheduler.CommandBuffer().setCheckpointNV(id); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 685ba6e0..a151ebc2 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -35,8 +35,10 @@ public: void DispatchDirect(); - void ScopeMarkerBegin(const std::string& str); + void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd(); + void ScopedMarkerInsert(const std::string_view& str); + void Breadcrumb(u64 id); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fb64285f..c74f3d07 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -158,6 +158,13 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { try { instance.GetGraphicsQueue().submit(submit_info, info.fence); } catch (vk::DeviceLostError& err) { + if (instance.HasNvCheckpoints()) { + const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV(); + for (const auto& cp : checkpoint_data) { + LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage), + reinterpret_cast(cp.pCheckpointMarker)); + } + } UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f7aef847..f1148760 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -154,6 +155,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, }; image.Create(image_ci); + + Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}", + info.guest_address, info.guest_size_bytes); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, From 3d0fdf11f0781ac8b84f5e501d8837137a45e387 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:23:01 +0200 Subject: [PATCH 06/23] Build stabilization (#413) * shader_recompiler: fix for float convert and debug asserts * libraries: kernel: correct return code on invalid semaphore * amdgpu: additional case for cb extents retrieval heuristic * removed redundant check in assert * amdgpu: fix for linear tiling mode detection fin color buffers * texture_cache: fix for unexpected scheduler flushes by detiler * renderer_vulkan: missing depth barrier * texture_cache: missed slices in rt view; + detiler format --- .../libraries/kernel/threads/semaphore.cpp | 12 +++++++ .../spirv/emit_spirv_context_get_set.cpp | 9 +---- .../backend/spirv/emit_spirv_image.cpp | 4 +-- .../backend/spirv/emit_spirv_instructions.h | 2 +- .../ir/passes/resource_tracking_pass.cpp | 6 ++-- src/shader_recompiler/runtime_info.h | 2 +- src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 2 +- src/video_core/buffer_cache/buffer.h | 4 +++ .../renderer_vulkan/vk_rasterizer.cpp | 3 +- .../renderer_vulkan/vk_scheduler.cpp | 36 ++++++++++++++++--- src/video_core/renderer_vulkan/vk_scheduler.h | 3 +- src/video_core/texture_cache/image_info.cpp | 4 ++- src/video_core/texture_cache/image_view.cpp | 2 ++ src/video_core/texture_cache/tile_manager.cpp | 3 +- 15 files changed, 69 insertions(+), 25 deletions(-) diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 370dba44..5441c641 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -174,10 +174,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 } s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } if (!sem->Signal(signalCount)) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -185,10 +191,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(false, needCount, nullptr); } int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Cancel(setCount, pNumWaitThreads); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e85272e9..bd34ed3d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -386,19 +386,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com if (is_signed) { value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertSToF(ctx.F32[1], value); } else { value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertUToF(ctx.F32[1], value); - } - } else { - if (is_signed) { - value = ctx.OpConvertSToF(ctx.F32[1], value); - } else { - value = ctx.OpConvertUToF(ctx.F32[1], value); } } + value = ctx.OpBitcast(ctx.F32[1], value); return ConvertValue(ctx, value, num_format, bit_width); } break; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 030d3948..e2d2c1ae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -33,14 +33,14 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c operands.operands); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + spv::ImageOperandsMask::Lod, lod); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 51899eb4..0b2020f1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -359,7 +359,7 @@ Id EmitConvertU32U16(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, const IR::Value& offset); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97438f80..6c96faa3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -376,9 +376,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return -1; } // We have found this pattern. Build the sharp. - std::array buffer; + std::array buffer; buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); - buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32; + buffer[1] = 0; + buffer[2] = handle->Arg(2).U32(); + buffer[3] = handle->Arg(3).U32(); cbuf = std::bit_cast(buffer); // Assign a binding to this sharp. return descriptors.Add(BufferResource{ diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4ab71c3b..b936e06a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -116,7 +116,7 @@ struct PushData { std::array buf_offsets; void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 64 && binding < 32); + ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } }; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index bd32b5b9..517f9d53 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -237,7 +237,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spantype3.count; - if (nop_offset == 0x0e || nop_offset == 0x0d) { + if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { ASSERT_MSG(payload[nop_offset] == 0xc0001000, "NOP hint is missing in CB setup sequence"); last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 3ebd9a97..e28b5680 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -766,7 +766,7 @@ struct Liverpool { } TilingMode GetTilingMode() const { - return attrib.tile_mode_index; + return info.linear_general ? TilingMode::Display_Linear : attrib.tile_mode_index; } bool IsTiled() const { diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index e0d9da08..d373fbff 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -146,6 +146,10 @@ public: return offset; } + u64 GetFreeSize() const { + return size_bytes - offset - mapped_size; + } + private: struct Watch { u64 tick{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b6e43a1a..542624a0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() { .stencil = regs.stencil_clear}}, }; texture_cache.TouchMeta(htile_address, false); - state.num_depth_attachments++; + state.has_depth = true; + state.has_stencil = image.info.usage.stencil; } scheduler.BeginRendering(state); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c74f3d07..a6c2536b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -38,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.color_attachments.data(), - .pDepthAttachment = - render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr, + .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); @@ -50,6 +49,8 @@ void Scheduler::EndRendering() { return; } is_rendering = false; + current_cmdbuf.endRendering(); + boost::container::static_vector barriers; for (size_t i = 0; i < render_state.num_color_attachments; ++i) { barriers.push_back(vk::ImageMemoryBarrier{ @@ -70,10 +71,35 @@ void Scheduler::EndRendering() { }, }); } - current_cmdbuf.endRendering(); + if (render_state.has_depth) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = render_state.depth_attachment.imageLayout, + .newLayout = render_state.depth_attachment.imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = render_state.depth_image, + .subresourceRange = + { + .aspectMask = vk::ImageAspectFlagBits::eDepth | + (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil + : vk::ImageAspectFlagBits::eNone), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + if (!barriers.empty()) { - current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eFragmentShader, + const auto src_stages = + vk::PipelineStageFlagBits::eColorAttachmentOutput | + (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eEarlyFragmentTests + : vk::PipelineStageFlagBits::eNone); + current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); } } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b82d558c..1140bfbc 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -20,7 +20,8 @@ struct RenderState { vk::RenderingAttachmentInfo depth_attachment{}; vk::Image depth_image{}; u32 num_color_attachments{}; - u32 num_depth_attachments{}; + bool has_depth{}; + bool has_stencil{}; u32 width = std::numeric_limits::max(); u32 height = std::numeric_limits::max(); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 94917be0..17b78a6d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; + usage.stencil = + buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); @@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { - // ASSERT(!props.is_cube && !props.is_block); + ASSERT(!props.is_block); ASSERT(num_samples == 1); std::tie(mip_info.pitch, mip_info.size) = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index ef6163c4..cbf77f2d 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept { const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); + range.base.layer = col_buffer.view.slice_start; + range.extent.layers = col_buffer.NumSlices(); format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d3a7d796..75fa378c 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -194,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Uint: case vk::Format::eR16G16B16A16Unorm: + case vk::Format::eR16G16B16A16Sfloat: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2UnormBlock: @@ -397,7 +398,7 @@ std::optional TileManager::TryDetile(Image& image) { const u32 image_size = image.info.guest_size_bytes; const auto [in_buffer, in_offset] = [&] -> std::pair { // Use stream buffer for smaller textures. - if (image_size <= StreamBufferSize) { + if (image_size <= stream_buffer.GetFreeSize()) { u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); return {stream_buffer.Handle(), offset}; } From 2ba3221fc94df83d79103b957fd8888296c4f305 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 12 Aug 2024 20:10:42 +0200 Subject: [PATCH 07/23] fix for Linux compilation (#416) --- src/video_core/amdgpu/liverpool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index e28b5680..779e5536 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -766,7 +766,8 @@ struct Liverpool { } TilingMode GetTilingMode() const { - return info.linear_general ? TilingMode::Display_Linear : attrib.tile_mode_index; + return info.linear_general ? TilingMode::Display_Linear + : attrib.tile_mode_index.Value(); } bool IsTiled() const { From a15a93997cbd4bcf14008926bcaf0db3994137ed Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 12 Aug 2024 22:52:21 +0200 Subject: [PATCH 08/23] unlink sync2 if not present (tentative fix for #418) --- src/video_core/renderer_vulkan/vk_instance.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index eedba4c8..a54f2e0c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -317,7 +317,7 @@ bool Instance::CreateDevice() { .nullDescriptor = true, }, vk::PhysicalDeviceSynchronization2Features{ - .synchronization2 = has_sync2, + .synchronization2 = true, }, }; @@ -328,12 +328,18 @@ bool Instance::CreateDevice() { if (!robustness) { device_chain.unlink(); } + if (!has_sync2) { + device_chain.unlink(); + } try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what()); return false; + } catch (vk::FeatureNotPresentError& err) { + LOG_CRITICAL(Render_Vulkan, "Some required features are not available {}", err.what()); + return false; } VULKAN_HPP_DEFAULT_DISPATCHER.init(*device); From 284035d3e2089beb5a353101b919b3a0fbb410ce Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 12 Aug 2024 14:41:26 -0700 Subject: [PATCH 09/23] Enable VK_EXT_robustness2 nullDescriptor only if supported. --- src/video_core/renderer_vulkan/vk_instance.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index a54f2e0c..5beb57c4 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -164,7 +164,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceVulkan13Features, vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR, - vk::PhysicalDeviceDepthClipControlFeaturesEXT>(); + vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT>(); const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>(); @@ -325,7 +325,10 @@ bool Instance::CreateDevice() { device_chain.unlink(); device_chain.unlink(); } - if (!robustness) { + if (robustness) { + device_chain.get().nullDescriptor = + feature_chain.get().nullDescriptor; + } else { device_chain.unlink(); } if (!has_sync2) { From 18f179928007c99dd2f5a739cf04a7b49dbab45f Mon Sep 17 00:00:00 2001 From: Borchev <4501931+Borchev@users.noreply.github.com> Date: Mon, 12 Aug 2024 23:05:30 -0700 Subject: [PATCH 10/23] Add partial unmap support (#322) * Add partial unmap support * undo accidental whitespace removal * Fix assertions * Adjust Reserve and Free functions for partial unmapping --- src/core/address_space.cpp | 24 ++++++++++++++++++++++-- src/core/address_space.h | 3 ++- src/core/memory.cpp | 37 +++++++++++++++++++++---------------- src/core/memory.h | 3 ++- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index c3e0d77a..91c8b7a1 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -454,8 +454,28 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 #endif } -void AddressSpace::Unmap(VAddr virtual_addr, size_t size, bool has_backing) { - return impl->Unmap(virtual_addr, size, has_backing); +void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing) { +#ifdef _WIN32 + // There does not appear to be comparable support for partial unmapping on Windows. + // Unfortunately, a least one title was found to require this. The workaround is to unmap + // the entire allocation and remap the portions outside of the requested unmapping range. + impl->Unmap(virtual_addr, size, has_backing); + + // TODO: Determine if any titles require partial unmapping support for flexible allocations. + ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size), + "Partial unmapping of flexible allocations is not supported"); + + if (start_in_vma != 0) { + Map(virtual_addr, start_in_vma, 0, phys_base, is_exec); + } + + if (end_in_vma != size) { + Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec); + } +#else + impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing); +#endif } void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) { diff --git a/src/core/address_space.h b/src/core/address_space.h index 29f74f56..53041bcc 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -91,7 +91,8 @@ public: void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd); /// Unmaps specified virtual memory area. - void Unmap(VAddr virtual_addr, size_t size, bool has_backing); + void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma, + PAddr phys_base, bool is_exec, bool has_backing); void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index dc5ded41..eed5126c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -54,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; // Add the allocated region to the list and commit its pages. - auto& area = CarveDmemArea(free_addr, size); + auto& area = CarveDmemArea(free_addr, size)->second; area.memory_type = memory_type; area.is_free = false; return free_addr; @@ -63,9 +63,8 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; - const auto dmem_area = FindDmemArea(phys_addr); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.base == phys_addr && - dmem_area->second.size == size); + auto dmem_area = CarveDmemArea(phys_addr, size); + ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -74,10 +73,11 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { continue; } if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) { - LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr, - mapping.size); + auto vma_segment_start_addr = phys_addr - mapping.phys_base + addr; + LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", + vma_segment_start_addr, size); // Unmaping might erase from vma_map. We can't do it here. - remove_list.emplace_back(addr, mapping.size); + remove_list.emplace_back(vma_segment_start_addr, size); } } for (const auto& [addr, size] : remove_list) { @@ -104,8 +104,6 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem const auto& vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { - ASSERT_MSG(vma.base == mapped_addr && vma.size == size, - "Region must match when reserving a mapped region"); UnmapMemory(mapped_addr, size); } const size_t remaining_size = vma.base + vma.size - mapped_addr; @@ -169,6 +167,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.prot = prot; new_vma.name = name; new_vma.type = type; + new_vma.is_exec = is_exec; if (type == VMAType::Direct) { new_vma.phys_base = phys_addr; @@ -216,10 +215,16 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; const auto it = FindVMA(virtual_addr); - ASSERT_MSG(it->second.Contains(virtual_addr, size), + const auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr, size), "Existing mapping does not contain requested unmap range"); - const auto type = it->second.type; + const auto vma_base_addr = vma_base.base; + const auto vma_base_size = vma_base.size; + const auto phys_base = vma_base.phys_base; + const bool is_exec = vma_base.is_exec; + const auto start_in_vma = virtual_addr - vma_base_addr; + const auto type = vma_base.type; const bool has_backing = type == VMAType::Direct || type == VMAType::File; if (type == VMAType::Direct) { rasterizer->UnmapMemory(virtual_addr, size); @@ -239,7 +244,8 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { MergeAdjacent(vma_map, new_it); // Unmap the memory region. - impl.Unmap(virtual_addr, size, has_backing); + impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec, + has_backing); TRACK_FREE(virtual_addr, "VMEM"); } @@ -397,13 +403,12 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size return vma_handle; } -DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) { +MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; - ASSERT_MSG(area.is_free && area.base <= addr, - "Adding an allocation to already allocated region"); + ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); const PAddr start_in_area = addr - area.base; const PAddr end_in_vma = start_in_area + size; @@ -418,7 +423,7 @@ DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) { dmem_handle = Split(dmem_handle, start_in_area); } - return dmem_handle->second; + return dmem_handle; } MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) { diff --git a/src/core/memory.h b/src/core/memory.h index 6d0a977f..d5826967 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -84,6 +84,7 @@ struct VirtualMemoryArea { bool disallow_merge = false; std::string name = ""; uintptr_t fd = 0; + bool is_exec = false; bool Contains(VAddr addr, size_t size) const { return addr >= base && (addr + size) <= (base + this->size); @@ -205,7 +206,7 @@ private: VMAHandle CarveVMA(VAddr virtual_addr, size_t size); - DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size); + DMemHandle CarveDmemArea(PAddr addr, size_t size); VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma); From 5eecd089ab259d2ae6f7d4bca48ebfaa04736089 Mon Sep 17 00:00:00 2001 From: Lizardy <6063922+lzardy@users.noreply.github.com> Date: Tue, 13 Aug 2024 02:08:03 -0400 Subject: [PATCH 11/23] thread_management.cpp: Various Mandatory Threading Fixes | Resolve #398 (#394) * Handle empty mutex attribute - scePthreadMutexInit did not return default when the mutex attributes were empty, now it does * fix conditional unsafety * Update thread_management.cpp fix deref * accurate heap api - modified HeapAPI to a struct with preset function fields - utilized the full array parameter passed to _sceKernelRtldSetApplicationHeapAPI * fallback to std malloc * clang format * Declare all HeapAPI replacement functions - calloc, realloc, memalign, reallocalign, malloc_stats, malloc_stats_fast, malloc_usable_size - posix_memalign corrected parameters * resolve suggestions - `using` definition replacement for AppHeapAPI - linux uses heap_malloc, windows uses std::malloc --------- Co-authored-by: microsoftv <6063922+microsoftv@users.noreply.github.com> --- .../libraries/kernel/memory_management.cpp | 4 ++-- src/core/libraries/kernel/memory_management.h | 2 +- .../libraries/kernel/thread_management.cpp | 12 ++++++++-- src/core/linker.cpp | 20 ++++++++++++++--- src/core/linker.h | 22 +++++++++++++++---- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 94762c4a..54c5860f 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -212,9 +212,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* out_size) { return ORBIS_OK; } -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) { +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]) { auto* linker = Common::Singleton::Instance(); - linker->SetHeapApiFunc(func); + linker->SetHeapAPI(func); } int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index 6735ead7..378449cc 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -98,7 +98,7 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize); s32 PS4_SYSV_ABI sceKernelAvailableFlexibleMemorySize(size_t* sizeOut); -void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func); +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]); int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut); diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 85e2d0e6..cdd729da 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -421,13 +421,21 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) { return addr; } -int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* attr, +int PS4_SYSV_ABI scePthreadMutexInit(ScePthreadMutex* mutex, const ScePthreadMutexattr* mutex_attr, const char* name) { + const ScePthreadMutexattr* attr; + if (mutex == nullptr) { return SCE_KERNEL_ERROR_EINVAL; } - if (attr == nullptr) { + if (mutex_attr == nullptr) { attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + if (*mutex_attr == nullptr) { + attr = g_pthread_cxt->getDefaultMutexattr(); + } else { + attr = mutex_attr; + } } *mutex = new PthreadMutexInternal{}; diff --git a/src/core/linker.cpp b/src/core/linker.cpp index e4cbe573..d4a15825 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -305,7 +305,8 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { // Module was just loaded by above code. Allocate TLS block for it. Module* module = m_modules[module_index - 1].get(); const u32 init_image_size = module->tls.init_image_size; - u8* dest = reinterpret_cast(heap_api_func(module->tls.image_size)); + // TODO: Determine if Windows will crash from this + u8* dest = reinterpret_cast(heap_api->heap_malloc(module->tls.image_size)); const u8* src = reinterpret_cast(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -335,10 +336,23 @@ void Linker::InitTlsForThread(bool is_primary) { &addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls"); ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); } else { - if (heap_api_func) { - addr_out = heap_api_func(total_tls_size); + if (heap_api) { +#ifndef WIN32 + addr_out = heap_api->heap_malloc(total_tls_size); } else { addr_out = std::malloc(total_tls_size); +#else + // TODO: Windows tls malloc replacement, refer to rtld_tls_block_malloc + LOG_ERROR(Core_Linker, "TLS user malloc called, using std::malloc"); + addr_out = std::malloc(total_tls_size); + if (!addr_out) { + auto pth_id = pthread_self(); + auto handle = pthread_gethandle(pth_id); + ASSERT_MSG(addr_out, + "Cannot allocate TLS block defined for handle=%x, index=%d size=%d", + handle, pth_id, total_tls_size); + } +#endif } } diff --git a/src/core/linker.h b/src/core/linker.h index aee8c8fd..ed1fe400 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -46,7 +46,21 @@ struct EntryParams { const char* argv[3]; }; -using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t); +struct HeapAPI { + PS4_SYSV_ABI void* (*heap_malloc)(size_t); + PS4_SYSV_ABI void (*heap_free)(void*); + PS4_SYSV_ABI void* (*heap_calloc)(size_t, size_t); + PS4_SYSV_ABI void* (*heap_realloc)(void*, size_t); + PS4_SYSV_ABI void* (*heap_memalign)(size_t, size_t); + PS4_SYSV_ABI int (*heap_posix_memalign)(void**, size_t, size_t); + // NOTE: Fields below may be inaccurate + PS4_SYSV_ABI int (*heap_reallocalign)(void); + PS4_SYSV_ABI void (*heap_malloc_stats)(void); + PS4_SYSV_ABI int (*heap_malloc_stats_fast)(void); + PS4_SYSV_ABI size_t (*heap_malloc_usable_size)(void*); +}; + +using AppHeapAPI = HeapAPI*; class Linker { public: @@ -75,8 +89,8 @@ public: } } - void SetHeapApiFunc(void* func) { - heap_api_func = *reinterpret_cast(func); + void SetHeapAPI(void* func[]) { + heap_api = reinterpret_cast(func); } void AdvanceGenerationCounter() noexcept { @@ -104,7 +118,7 @@ private: size_t static_tls_size{}; u32 max_tls_index{}; u32 num_static_modules{}; - HeapApiFunc heap_api_func{}; + AppHeapAPI heap_api{}; std::vector> m_modules; Loader::SymbolsResolver m_hle_symbols{}; }; From dfcfd62d4f76d392b1754ce13e8a11154749a78d Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Tue, 13 Aug 2024 03:12:38 -0300 Subject: [PATCH 12/23] spirv: fix image sample lod/clamp/offset translation (#402) * spirv: fix image sample lod/clamp translation * spirv: fix image sample offsets * fix ImageSample opcodes & offset emission --- .../backend/spirv/emit_spirv_image.cpp | 45 ++++++++++++---- .../backend/spirv/emit_spirv_instructions.h | 6 +-- .../frontend/translate/vector_memory.cpp | 13 +++-- src/shader_recompiler/ir/ir_emitter.cpp | 44 +++++----------- src/shader_recompiler/ir/ir_emitter.h | 22 ++++---- src/shader_recompiler/ir/opcodes.inc | 8 +-- .../ir/passes/resource_tracking_pass.cpp | 51 ++++++++++++++----- 7 files changed, 112 insertions(+), 77 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index e2d2c1ae..72a60327 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -21,14 +21,19 @@ struct ImageOperands { boost::container::static_vector operands; }; -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + if (Sirit::ValidId(bias)) { + operands.Add(spv::ImageOperandsMask::Bias, bias); + } + if (Sirit::ValidId(offset)) { + operands.Add(spv::ImageOperandsMask::Offset, offset); + } return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } @@ -39,27 +44,49 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, - spv::ImageOperandsMask::Lod, lod); + ImageOperands operands; + if (Sirit::ValidId(lod)) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } + if (Sirit::ValidId(offset)) { + operands.Add(spv::ImageOperandsMask::Offset, offset); + } + return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, + operands.operands); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset) { + Id bias, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref); + ImageOperands operands; + if (Sirit::ValidId(bias)) { + operands.Add(spv::ImageOperandsMask::Bias, bias); + } + if (Sirit::ValidId(offset)) { + operands.Add(spv::ImageOperandsMask::Offset, offset); + } + return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset) { + Id lod, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); - return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + ImageOperands operands; + if (Sirit::ValidId(lod)) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } + if (Sirit::ValidId(offset)) { + operands.Add(spv::ImageOperandsMask::Offset, offset); + } + return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, + operands.operands); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0b2020f1..85c6eaac 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -357,14 +357,14 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertU16U32(EmitContext& ctx, Id value); Id EmitConvertU32U16(EmitContext& ctx, Id value); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, const IR::Value& offset); + Id bias, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias_lc, Id offset); + Id lod, Id offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2, Id dref); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 3c6dfbda..bb202e42 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -135,8 +135,8 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction // Set Architecture - const IR::Value offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; + const IR::U32 offset = + flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::U32{}; const IR::F32 bias = flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; const IR::F32 dref = @@ -168,18 +168,17 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { - const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, lod, offset, info); + return ir.ImageSampleExplicitLod(handle, body, offset, info); } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info); + return ir.ImageSampleImplicitLod(handle, body, bias, offset, info); } } if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info); + return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info); } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info); + return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info); }(); for (u32 i = 0; i < 4; i++) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 03404aca..08b7fbbc 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -16,18 +16,6 @@ namespace { UNREACHABLE_MSG("Invalid type = {}, functionName = {}, line = {}", u32(type), functionName, lineNumber); } - -Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { - if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { - return ir.CompositeConstruct(bias_lod, lod_clamp); - } else if (!bias_lod.IsEmpty()) { - return bias_lod; - } else if (!lod_clamp.IsEmpty()) { - return lod_clamp; - } else { - return Value{}; - } -} } // Anonymous namespace U1 IREmitter::Imm1(bool value) const { @@ -1386,30 +1374,26 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, - const Value& offset, const F32& lod_clamp, +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset); + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset); } -Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, - const Value& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); -} - -F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info) { - const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; - return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc, +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, + const F32& bias, const U32& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias, offset); } -F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& lod, const Value& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref, + const U32& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{}, offset); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index a65e4613..fda20639 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -241,19 +241,21 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); - [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, - const F32& bias, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, - const F32& lod, const Value& offset, + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, + const F32& bias, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, const F32& bias, - const Value& offset, const F32& lod_clamp, + const U32& offset, TextureInstInfo info); + + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body, + const F32& dref, const U32& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, - const F32& dref, const F32& lod, - const Value& offset, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, const IR::U1& skip_mips); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index aa2fd3f8..46918bc3 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -298,10 +298,10 @@ OPCODE(ConvertU16U32, U16, U32, OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, U32, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, U32, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, U32, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, U32, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 6c96faa3..bacbac72 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -567,25 +567,47 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_offset) { // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const bool is_gather = inst.GetOpcode() == IR::Opcode::ImageGather || - inst.GetOpcode() == IR::Opcode::ImageGatherDref; - const u32 arg_pos = is_gather ? 2 : (inst_info.is_depth ? 4 : 3); + const u32 arg_pos = [&]() -> u32 { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + return 2; + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + return 3; + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + return 4; + default: + break; + } + return inst_info.is_depth ? 4 : 3; + }(); const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - const auto sign_ext = [&](u32 value) { return ir.Imm32(s32(value << 24) >> 24); }; - union { - u32 raw; - BitField<0, 6, u32> x; - BitField<8, 6, u32> y; - BitField<16, 6, u32> z; - } offset{arg.U32()}; - const IR::Value value = ir.CompositeConstruct(sign_ext(offset.x), sign_ext(offset.y)); + const auto f = [&](IR::Value value, u32 offset) -> auto { + return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); + }; + + const auto x = f(arg, 0); + const auto y = f(arg, 8); + const auto z = f(arg, 16); + const IR::Value value = ir.CompositeConstruct(x, y, z); inst.SetArg(arg_pos, value); } if (inst_info.has_lod_clamp) { - // Final argument contains lod_clamp - const u32 arg_pos = inst_info.is_depth ? 5 : 4; + const u32 arg_pos = [&]() -> u32 { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageSampleImplicitLod: + return 2; + case IR::Opcode::ImageSampleDrefImplicitLod: + return 3; + default: + break; + } + return inst_info.is_depth ? 5 : 4; + }(); inst.SetArg(arg_pos, arg); } if (inst_info.explicit_lod) { @@ -593,7 +615,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; - inst.SetArg(pos, arg); + const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg; + inst.SetArg(pos, value); } } From 1fb0da9b897b9a2b9d31a42898b660ef7d01a848 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:21:48 +0300 Subject: [PATCH 13/23] video_core: Crucial buffer cache fixes + proper GPU clears (#414) * translator: Use templates for stronger type guarantees * spirv: Define buffer offsets upfront * Saves a lot of shader instructions * buffer_cache: Use dynamic vertex input when available * Fixes issues when games like dark souls rebind vertex buffers with different stride * externals: Update boost * spirv: Use runtime array for ssbos * ssbos can be large and typically their size will vary, especially in generic copy/clear cs shaders * fs: Lock when doing case insensitive search * Dark Souls does fs lookups from different threads * texture_cache: More precise invalidation from compute * Fixes unrelated render targets being cleared * texture_cache: Use hashes for protect gpu modified images from reupload * translator: Treat V_CNDMASK as float * Sometimes it can have input modifiers. Worst this will cause is some extra calls to uintBitsToFloat and opposite. But most often this is used as float anyway * translator: Small optimization for V_SAD_U32 * Fix review * clang format --- externals/ext-boost | 2 +- src/core/file_sys/fs.cpp | 1 + .../libraries/kernel/threads/semaphore.cpp | 3 - .../spirv/emit_spirv_context_get_set.cpp | 15 +- .../backend/spirv/emit_spirv_special.cpp | 4 +- .../backend/spirv/spirv_emit_context.cpp | 26 +- .../backend/spirv/spirv_emit_context.h | 5 +- .../frontend/translate/translate.cpp | 329 ++++++++---------- .../frontend/translate/translate.h | 8 +- .../frontend/translate/vector_alu.cpp | 147 ++++---- src/video_core/buffer_cache/buffer_cache.cpp | 41 +++ src/video_core/buffer_cache/buffer_cache.h | 6 + .../renderer_vulkan/renderer_vulkan.h | 4 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 + .../renderer_vulkan/vk_instance.cpp | 9 +- src/video_core/renderer_vulkan/vk_instance.h | 6 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + src/video_core/texture_cache/image.cpp | 1 + src/video_core/texture_cache/image.h | 1 + .../texture_cache/texture_cache.cpp | 82 +++-- src/video_core/texture_cache/texture_cache.h | 15 +- src/video_core/texture_cache/tile_manager.cpp | 1 - 23 files changed, 372 insertions(+), 346 deletions(-) diff --git a/externals/ext-boost b/externals/ext-boost index 147b2de7..a04136ad 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7 +Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53 diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 2bcff191..a6d5c3ea 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -54,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) // If the path does not exist attempt to verify this. // Retrieve parent path until we find one that exists. + std::scoped_lock lk{m_mutex}; path_parts.clear(); auto current_path = host_path; while (!std::filesystem::exists(current_path)) { diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 5441c641..5304dc57 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -9,7 +9,6 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/libraries/error_codes.h" -#include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" namespace Libraries::Kernel { @@ -82,7 +81,6 @@ public: public: struct WaitingThread : public ListBaseHook { - std::string name; std::condition_variable cv; u32 priority; s32 need_count; @@ -90,7 +88,6 @@ public: bool was_cancled{}; explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { - name = scePthreadSelf()->name; if (is_fifo) { return; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index bd34ed3d..5eae058a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -128,11 +128,7 @@ Id EmitReadConst(EmitContext& ctx) { Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } - const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))}; - index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords); + index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; return ctx.OpLoad(buffer.data_types->Get(1), ptr); } @@ -229,9 +225,6 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { template static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { @@ -404,9 +397,6 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com template static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); if constexpr (N == 1) { return GetBufferFormatValue(ctx, handle, address, 0); @@ -438,9 +428,6 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad template static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { auto& buffer = ctx.buffers[handle]; - if (!Sirit::ValidId(buffer.offset)) { - buffer.offset = ctx.GetBufferOffset(buffer.global_binding); - } address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); if constexpr (N == 1) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 891e41df..3ed89692 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -6,7 +6,9 @@ namespace Shader::Backend::SPIRV { -void EmitPrologue(EmitContext& ctx) {} +void EmitPrologue(EmitContext& ctx) { + ctx.DefineBufferOffsets(); +} void EmitEpilogue(EmitContext& ctx) {} diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 61b55437..55754d45 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f throw InvalidArgument("Invalid attribute type {}", fmt); } -Id EmitContext::GetBufferOffset(u32 binding) { - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); - const u32 comp = (binding & 0xf) >> 2; - const u32 offset = (binding & 0x3) << 3; - const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(half), ConstU32(comp))}; - const Id value{OpLoad(U32[1], ptr)}; - return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); +void EmitContext::DefineBufferOffsets() { + for (auto& buffer : buffers) { + const u32 binding = buffer.binding; + const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); + } } Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { @@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() { for (u32 i = 0; const auto& buffer : info.buffers) { const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; - const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))}; + const Id record_array_type{buffer.is_storage + ? TypeRuntimeArray(data_type) + : TypeArray(data_type, ConstU32(buffer.length))}; const Id struct_type{TypeStruct(record_array_type)}; if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); @@ -354,7 +360,7 @@ void EmitContext::DefineBuffers() { buffers.push_back({ .id = id, - .global_binding = binding++, + .binding = binding++, .data_types = data_types, .pointer_type = pointer_type, .buffer = buffer.GetVsharp(info), diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0d090eb3..81237a9a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -40,7 +40,7 @@ public: ~EmitContext(); Id Def(const IR::Value& value); - Id GetBufferOffset(u32 binding); + void DefineBufferOffsets(); [[nodiscard]] Id DefineInput(Id type, u32 location) { const Id input_id{DefineVar(type, spv::StorageClass::Input)}; @@ -203,7 +203,8 @@ public: struct BufferDefinition { Id id; Id offset; - u32 global_binding; + Id offset_dwords; + u32 binding; const VectorIds* data_types; Id pointer_type; AmdGpu::Buffer buffer; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b295c1be..8ffde7fb 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -73,101 +73,190 @@ void Translator::EmitPrologue() { } } -template <> -IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - IR::U32F32 value{}; +template +T Translator::GetSrc(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; - const bool is_float = operand.type == ScalarType::Float32 || force_flt; + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm32(std::bit_cast(value)); + } else { + return ir.Imm32(std::bit_cast(value)); + } + }; + + T value{}; switch (operand.field) { case OperandField::ScalarGPR: - if (is_float) { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } else { - value = ir.GetScalarReg(IR::ScalarReg(operand.code)); - } + value = ir.GetScalarReg(IR::ScalarReg(operand.code)); break; case OperandField::VectorGPR: - if (is_float) { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } else { - value = ir.GetVectorReg(IR::VectorReg(operand.code)); - } + value = ir.GetVectorReg(IR::VectorReg(operand.code)); break; case OperandField::ConstZero: - if (is_float) { - value = ir.Imm32(0.f); - } else { - value = ir.Imm32(0U); - } + value = get_imm(0U); break; case OperandField::SignedConstIntPos: - ASSERT(!force_flt); - value = ir.Imm32(operand.code - SignedConstIntPosMin + 1); + value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: - ASSERT(!force_flt); - value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1); + value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: - if (is_float) { - value = ir.Imm32(std::bit_cast(operand.code)); - } else { - value = ir.Imm32(operand.code); - } + value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: - if (is_float) { - value = ir.Imm32(1.f); - } else { - value = ir.Imm32(std::bit_cast(1.f)); - } + value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: - value = ir.Imm32(0.5f); + value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: - value = ir.Imm32(2.0f); + value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: - value = ir.Imm32(4.0f); + value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: - value = ir.Imm32(-0.5f); + value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: - if (is_float) { - value = ir.Imm32(-1.0f); - } else { - value = ir.Imm32(std::bit_cast(-1.0f)); - } + value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: - value = ir.Imm32(-2.0f); + value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: - value = ir.Imm32(-4.0f); + value = get_imm(-4.0f); break; case OperandField::VccLo: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); } else { value = ir.GetVccLo(); } break; case OperandField::VccHi: - if (force_flt) { + if constexpr (is_float) { value = ir.BitCast(ir.GetVccHi()); } else { value = ir.GetVccHi(); } break; case OperandField::M0: - return m0_value; + if constexpr (is_float) { + UNREACHABLE(); + } else { + return m0_value; + } default: UNREACHABLE(); } - if (is_float) { + if constexpr (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(value); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(value); + } + } else { + if (operand.input_modifier.abs) { + UNREACHABLE(); + } + if (operand.input_modifier.neg) { + UNREACHABLE(); + } + } + return value; +} + +template IR::U32 Translator::GetSrc(const InstOperand&); +template IR::F32 Translator::GetSrc(const InstOperand&); + +template +T Translator::GetSrc64(const InstOperand& operand) { + constexpr bool is_float = std::is_same_v; + + const auto get_imm = [&](auto value) -> T { + if constexpr (is_float) { + return ir.Imm64(std::bit_cast(value)); + } else { + return ir.Imm64(std::bit_cast(value)); + } + }; + + T value{}; + switch (operand.field) { + case OperandField::ScalarGPR: { + const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); + const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::VectorGPR: { + const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); + const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); + } + break; + } + case OperandField::ConstZero: + value = get_imm(0ULL); + break; + case OperandField::SignedConstIntPos: + value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1); + break; + case OperandField::SignedConstIntNeg: + value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1); + break; + case OperandField::LiteralConst: + value = get_imm(u64(operand.code)); + break; + case OperandField::ConstFloatPos_1_0: + value = get_imm(1.0); + break; + case OperandField::ConstFloatPos_0_5: + value = get_imm(0.5); + break; + case OperandField::ConstFloatPos_2_0: + value = get_imm(2.0); + break; + case OperandField::ConstFloatPos_4_0: + value = get_imm(4.0); + break; + case OperandField::ConstFloatNeg_0_5: + value = get_imm(-0.5); + break; + case OperandField::ConstFloatNeg_1_0: + value = get_imm(-1.0); + break; + case OperandField::ConstFloatNeg_2_0: + value = get_imm(-2.0); + break; + case OperandField::ConstFloatNeg_4_0: + value = get_imm(-4.0); + break; + case OperandField::VccLo: + if constexpr (is_float) { + UNREACHABLE(); + } else { + value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); + } + break; + case OperandField::VccHi: + default: + UNREACHABLE(); + } + + if constexpr (is_float) { if (operand.input_modifier.abs) { value = ir.FPAbs(value); } @@ -178,148 +267,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { return value; } -template <> -IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - return GetSrc(operand, force_flt); -} - -template <> -IR::F32 Translator::GetSrc(const InstOperand& operand, bool) { - return GetSrc(operand, true); -} - -template <> -IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { - IR::Value value_hi{}; - IR::Value value_lo{}; - - bool immediate = false; - const bool is_float = operand.type == ScalarType::Float64 || force_flt; - switch (operand.field) { - case OperandField::ScalarGPR: - if (is_float) { - value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); - value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); - } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { - value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); - value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); - } else { - UNREACHABLE(); - } - break; - case OperandField::VectorGPR: - if (is_float) { - value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); - value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); - } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { - value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); - value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); - } else { - UNREACHABLE(); - } - break; - case OperandField::ConstZero: - immediate = true; - if (force_flt) { - value_lo = ir.Imm64(0.0); - } else { - value_lo = ir.Imm64(u64(0U)); - } - break; - case OperandField::SignedConstIntPos: - ASSERT(!force_flt); - immediate = true; - value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1); - break; - case OperandField::SignedConstIntNeg: - ASSERT(!force_flt); - immediate = true; - value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1); - break; - case OperandField::LiteralConst: - immediate = true; - if (force_flt) { - UNREACHABLE(); // There is a literal double? - } else { - value_lo = ir.Imm64(u64(operand.code)); - } - break; - case OperandField::ConstFloatPos_1_0: - immediate = true; - if (force_flt) { - value_lo = ir.Imm64(1.0); - } else { - value_lo = ir.Imm64(std::bit_cast(f64(1.0))); - } - break; - case OperandField::ConstFloatPos_0_5: - immediate = true; - value_lo = ir.Imm64(0.5); - break; - case OperandField::ConstFloatPos_2_0: - immediate = true; - value_lo = ir.Imm64(2.0); - break; - case OperandField::ConstFloatPos_4_0: - immediate = true; - value_lo = ir.Imm64(4.0); - break; - case OperandField::ConstFloatNeg_0_5: - immediate = true; - value_lo = ir.Imm64(-0.5); - break; - case OperandField::ConstFloatNeg_1_0: - immediate = true; - value_lo = ir.Imm64(-1.0); - break; - case OperandField::ConstFloatNeg_2_0: - immediate = true; - value_lo = ir.Imm64(-2.0); - break; - case OperandField::ConstFloatNeg_4_0: - immediate = true; - value_lo = ir.Imm64(-4.0); - break; - case OperandField::VccLo: { - value_lo = ir.GetVccLo(); - value_hi = ir.GetVccHi(); - } break; - case OperandField::VccHi: - UNREACHABLE(); - default: - UNREACHABLE(); - } - - IR::Value value; - - if (immediate) { - value = value_lo; - } else if (is_float) { - throw NotImplementedException("required OpPackDouble2x32 implementation"); - } else { - IR::Value packed = ir.CompositeConstruct(value_lo, value_hi); - value = ir.PackUint2x32(packed); - } - - if (is_float) { - if (operand.input_modifier.abs) { - value = ir.FPAbs(IR::F32F64(value)); - } - if (operand.input_modifier.neg) { - value = ir.FPNeg(IR::F32F64(value)); - } - } - return IR::U64F64(value); -} - -template <> -IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { - return GetSrc64(operand, force_flt); -} -template <> -IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) { - return GetSrc64(operand, true); -} +template IR::U64 Translator::GetSrc64(const InstOperand&); +template IR::F64 Translator::GetSrc64(const InstOperand&); void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index fe4457d2..2e12209d 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -211,10 +211,10 @@ public: void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); private: - template - [[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false); - template - [[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false); + template + [[nodiscard]] T GetSrc(const InstOperand& operand); + template + [[nodiscard]] T GetSrc64(const InstOperand& operand); void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 89428c44..1bbc3c16 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" -#include "shader_recompiler/profile.h" namespace Shader::Gcn { @@ -312,7 +311,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { } void Translator::V_MOV(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_SAD(const GcnInst& inst) { @@ -321,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) { } void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true), - GetSrc(inst.dst[0], true))); + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), + GetSrc(inst.dst[0]))); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::Value vec_f32 = - ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)); + ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } @@ -339,13 +338,13 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) { } void Translator::V_CVT_F16_F32(const GcnInst& inst) { - const IR::F32 src0 = GetSrc(inst.src[0], true); + const IR::F32 src0 = GetSrc(inst.src[0]); const IR::F16 src0fp16 = ir.FPConvert(16, src0); SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast(src0fp16))); } void Translator::V_MUL_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true))); + SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } void Translator::V_CNDMASK_B32(const GcnInst& inst) { @@ -354,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR ? ir.GetThreadBitScalarReg(flag_reg) : ir.GetVcc(); - - // We can treat the instruction as integer most of the time, but when a source is - // a floating point constant we will force the other as float for better readability - // The other operand is also higly likely to be float as well. - const auto is_float_const = [](OperandField field) { - return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0; - }; - const bool has_flt_source = - is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field); - IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); - IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source); - if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) { - src1 = ir.BitCast(src1); - } - if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) { - src0 = ir.BitCast(src0); - } - const IR::Value result = ir.Select(flag, src1, src0); + const IR::Value result = + ir.Select(flag, GetSrc(inst.src[1]), GetSrc(inst.src[0])); ir.SetVectorReg(dst_reg, IR::U32F32{result}); } @@ -448,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) { } void Translator::V_MAD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_FRACT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.Fract(src0)); } void Translator::V_ADD_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPAdd(src0, src1)); } @@ -476,9 +459,9 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { } void Translator::V_MED3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2); SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); } @@ -492,32 +475,32 @@ void Translator::V_MED3_I32(const GcnInst& inst) { } void Translator::V_FLOOR_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; ir.SetVectorReg(dst_reg, ir.FPFloor(src0)); } void Translator::V_SUB_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src0, src1)); } void Translator::V_RCP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecip(src0)); } void Translator::V_FMA_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); } void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; const IR::U1 result = [&] { switch (op) { case ConditionOp::F: @@ -557,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { } void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); } @@ -569,40 +552,40 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { } void Translator::V_RSQ_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); } void Translator::V_SIN_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSin(src0)); } void Translator::V_LOG_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPLog2(src0)); } void Translator::V_EXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPExp2(src0)); } void Translator::V_SQRT_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPSqrt(src0)); } void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); } void Translator::V_MIN3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); } @@ -614,9 +597,9 @@ void Translator::V_MIN3_I32(const GcnInst& inst) { } void Translator::V_MADMK_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 k{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 k{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPFma(src0, k, src1)); } @@ -625,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) { } void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0], true)); + SetDst(inst.dst[0], GetSrc(inst.src[0])); } void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1], true)); + SetDst(inst.dst[0], GetSrc(inst.src[1])); } void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2], true)); + SetDst(inst.dst[0], GetSrc(inst.src[2])); } void Translator::V_CVT_U32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToU(32, src0)); } void Translator::V_SUBREV_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPSub(src1, src0)); } @@ -727,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - const IR::U32 max{ir.IMax(src0, src1, false)}; - const IR::U32 min{ir.IMin(src0, src1, false)}; - SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2)); + IR::U32 result; + if (src0.IsImmediate() && src0.U32() == 0U) { + result = src1; + } else if (src1.IsImmediate() && src1.U32() == 0U) { + result = src0; + } else { + const IR::U32 max{ir.IMax(src0, src1, false)}; + const IR::U32 min{ir.IMin(src0, src1, false)}; + result = ir.ISub(max, min); + } + SetDst(inst.dst[0], ir.IAdd(result, src2)); } void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { @@ -783,7 +774,7 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) { } void Translator::V_RNDNE_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPRoundEven(src0)); } @@ -794,14 +785,14 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) { } void Translator::V_COS_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCos(src0)); } void Translator::V_MAX3_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; - const IR::F32 src1{GetSrc(inst.src[1], true)}; - const IR::F32 src2{GetSrc(inst.src[2], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src2{GetSrc(inst.src[2])}; SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } @@ -813,7 +804,7 @@ void Translator::V_MAX3_U32(const GcnInst& inst) { } void Translator::V_CVT_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); } @@ -830,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { } void Translator::V_TRUNC_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPTrunc(src0)); } void Translator::V_CEIL_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.FPCeil(src0)); } @@ -899,18 +890,18 @@ void Translator::V_BFREV_B32(const GcnInst& inst) { } void Translator::V_LDEXP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0))); } void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { - const IR::F32F64 src0{GetSrc(inst.src[0])}; + const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; IR::U1 value; if (src1.IsImmediate()) { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 7ab0d817..2246807a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si } bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { + boost::container::small_vector attributes; + boost::container::small_vector bindings; + SCOPE_EXIT { + if (instance.IsVertexInputDynamicState()) { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setVertexInputEXT(bindings, attributes); + } + }; + if (vs_info.vs_inputs.empty()) { return false; } @@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } guest_buffers.emplace_back(buffer); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = + Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = buffer.GetStride(), + .inputRate = input.instance_step_rate == Shader::Info::VsInput::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, + .divisor = 1, + }); } std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { @@ -224,6 +248,19 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&buffer, buffer.Offset(device_addr)}; } +std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { + const u64 page = gpu_addr >> CACHING_PAGEBITS; + const BufferId buffer_id = page_table[page]; + if (buffer_id) { + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(gpu_addr, size)) { + return {&buffer, buffer.Offset(gpu_addr)}; + } + } + const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + return {&staging_buffer, offset}; +} + bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { const VAddr end_addr = addr + size; const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); @@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { return memory_tracker.IsRegionCpuModified(addr, size); } +bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { + return memory_tracker.IsRegionGpuModified(addr, size); +} + BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { if (device_addr == 0) { return NULL_BUFFER_ID; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0dee87cf..33ea3f86 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -69,12 +69,18 @@ public: /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written); + /// Obtains a temporary buffer for usage in texture cache. + [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + private: template void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 8178c88d..113b380e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,7 +47,7 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image, is_eop); } @@ -61,7 +61,7 @@ public: const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info, false); + const auto image_id = texture_cache.FindImage(info); return texture_cache.GetImage(image_id); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 21710a76..62b50eeb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -96,7 +96,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, Shader::PushData push_data{}; u32 binding{}; - for (u32 i = 0; const auto& buffer : info.buffers) { + for (const auto& buffer : info.buffers) { const auto vsharp = buffer.GetVsharp(info); const VAddr address = vsharp.base_address; // Most of the time when a metadata is updated with a shader it gets cleared. It means we @@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } const u32 size = vsharp.GetSize(); if (buffer.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.InvalidateMemory(address, size, true); } const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); @@ -137,7 +137,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); - i++; } for (const auto& image_desc : info.images) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5d87a1ca..cf23ade2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -145,6 +145,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT); dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); } + if (instance.IsVertexInputDynamicState()) { + dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); + } const vk::PipelineDynamicStateCreateInfo dynamic_info = { .dynamicStateCount = static_cast(dynamic_states.size()), diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 5beb57c4..b60b78e1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -202,6 +202,8 @@ bool Instance::CreateDevice() { add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); @@ -319,6 +321,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceSynchronization2Features{ .synchronization2 = true, }, + vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ + .vertexInputDynamicState = true, + }, }; if (!color_write_en) { @@ -331,8 +336,8 @@ bool Instance::CreateDevice() { } else { device_chain.unlink(); } - if (!has_sync2) { - device_chain.unlink(); + if (!vertex_input_dynamic_state) { + device_chain.unlink(); } try { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 2f2397d6..4cb4741a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -132,6 +132,11 @@ public: return color_write_en; } + /// Returns true when VK_EXT_vertex_input_dynamic_state is supported. + bool IsVertexInputDynamicState() const { + return vertex_input_dynamic_state; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -257,6 +262,7 @@ private: bool external_memory_host{}; bool workgroup_memory_explicit_layout{}; bool color_write_en{}; + bool vertex_input_dynamic_state{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8d27d252..8a22b925 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -209,6 +209,10 @@ void PipelineCache::RefreshGraphicsKey() { continue; } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); + if (!bininfo->Valid()) { + key.stage_hashes[i] = 0; + continue; + } key.stage_hashes[i] = bininfo->shader_hash; } } diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f1148760..bae4b89d 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -117,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index b18f1002..5a888346 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -111,6 +111,7 @@ struct Image { vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; + boost::container::small_vector mip_hashes; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 53596f8e..6b14faac 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -3,6 +3,7 @@ #include #include "common/assert.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -11,13 +12,11 @@ namespace VideoCore { -static constexpr u64 StreamBufferSize = 512_MB; static constexpr u64 PageShift = 12; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, - staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize}, tile_manager{instance, scheduler} { ImageInfo info; info.pixel_format = vk::Format::eR8G8B8A8Unorm; @@ -31,9 +30,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& TextureCache::~TextureCache() = default; -void TextureCache::InvalidateMemory(VAddr address, size_t size) { +void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) { std::unique_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { + if (from_compute && !image.Overlaps(address, size)) { + return; + } // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -57,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { } } -ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { +ImageId TextureCache::FindImage(const ImageInfo& info) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } @@ -87,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) { image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) { - RefreshImage(image); - TrackImage(image, image_id); - } - return image_id; } @@ -119,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); + UpdateImage(image_id); Image& image = slot_images[image_id]; auto& usage = image.info.usage; @@ -165,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; - image.flags &= ~ImageFlagBits::CpuModified; + image.flags |= ImageFlagBits::GpuModified; + UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, vk::AccessFlagBits::eColorAttachmentWrite | @@ -198,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(image_info, false); + const ImageId image_id = FindImage(image_info); Image& image = slot_images[image_id]; + image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::CpuModified; const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal @@ -228,22 +227,6 @@ void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; - scheduler.EndRendering(); - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - vk::Buffer buffer{staging.Handle()}; - u32 offset{0}; - - auto upload_buffer = tile_manager.TryDetile(image); - if (upload_buffer) { - buffer = *upload_buffer; - } else { - // Upload data to the staging buffer. - offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16); - } - const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); @@ -254,12 +237,23 @@ void TextureCache::RefreshImage(Image& image) { const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + + // Protect GPU modified resources from accidental reuploads. + if (True(image.flags & ImageFlagBits::GpuModified) && + !buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { + const u8* addr = std::bit_cast(image.info.guest_address); + const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); + if (image.mip_hashes[m] == hash) { + continue; + } + image.mip_hashes[m] = hash; + } image_copy.push_back({ - .bufferOffset = offset + mip_ofs * num_layers, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), + .bufferOffset = mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), .imageSubresource{ .aspectMask = vk::ImageAspectFlagBits::eColor, .mipLevel = m, @@ -271,6 +265,30 @@ void TextureCache::RefreshImage(Image& image) { }); } + if (image_copy.empty()) { + return; + } + + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + + const VAddr image_addr = image.info.guest_address; + const size_t image_size = image.info.guest_size_bytes; + vk::Buffer buffer{}; + u32 offset{}; + if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) { + buffer = *upload_buffer; + } else { + const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); + buffer = vk_buffer->Handle(); + offset = buf_offset; + } + + for (auto& copy : image_copy) { + copy.bufferOffset += offset; + } + cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 17a09898..b3af0ff1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -38,13 +38,13 @@ public: ~TextureCache(); /// Invalidates any image in the logical page range. - void InvalidateMemory(VAddr address, size_t size); + void InvalidateMemory(VAddr address, size_t size, bool from_compute = false); /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true); + [[nodiscard]] ImageId FindImage(const ImageInfo& info); /// Retrieves an image view with the properties of the specified image descriptor. [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, @@ -58,6 +58,16 @@ public: [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, const ImageViewInfo& view_info); + /// Updates image contents if it was modified by CPU. + void UpdateImage(ImageId image_id) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::CpuModified)) { + return; + } + RefreshImage(image); + TrackImage(image, image_id); + } + /// Reuploads image contents. void RefreshImage(Image& image); @@ -170,7 +180,6 @@ private: Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; PageManager& tracker; - StreamBuffer staging; TileManager tile_manager; Common::SlotVector slot_images; Common::SlotVector slot_image_views; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 75fa378c..6447fde1 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -5,7 +5,6 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image_view.h" -#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" #include "video_core/host_shaders/detile_m32x1_comp.h" From d1a033b6afd93d2d36a176c6d0a91c0e85147e3e Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 Aug 2024 00:30:47 -0700 Subject: [PATCH 14/23] Fix some Vulkan validation errors on macOS. (#420) --- .../renderer_vulkan/vk_instance.cpp | 27 ++++++++++++++++--- .../renderer_vulkan/vk_platform.cpp | 7 +++++ .../renderer_vulkan/vk_swapchain.cpp | 12 ++++++++- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index b60b78e1..66da030f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -164,7 +164,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceVulkan13Features, vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR, - vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT>(); + vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT, + vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>(); @@ -198,7 +199,7 @@ bool Instance::CreateDevice() { external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + const bool depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); @@ -213,7 +214,7 @@ bool Instance::CreateDevice() { // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); - add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); @@ -224,6 +225,11 @@ bool Instance::CreateDevice() { : false; } +#ifdef __APPLE__ + // Required by Vulkan spec if supported. + add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); +#endif + const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { LOG_CRITICAL(Render_Vulkan, "Physical device reported no queues."); @@ -324,12 +330,27 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ .vertexInputDynamicState = true, }, +#ifdef __APPLE__ + feature_chain.get(), +#endif }; + if (!maintenance4) { + device_chain.unlink(); + } + if (!custom_border_color) { + device_chain.unlink(); + } if (!color_write_en) { device_chain.unlink(); device_chain.unlink(); } + if (!depth_clip_control) { + device_chain.unlink(); + } + if (!workgroup_memory_explicit_layout) { + device_chain.unlink(); + } if (robustness) { device_chain.get().nullDescriptor = feature_chain.get().nullDescriptor; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 33113c58..c73a8139 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -157,6 +157,10 @@ std::vector GetInstanceExtensions(Frontend::WindowSystemType window break; } +#ifdef __APPLE__ + extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); +#endif + if (window_type != Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } @@ -285,6 +289,9 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .ppEnabledLayerNames = layers.data(), .enabledExtensionCount = static_cast(extensions.size()), .ppEnabledExtensionNames = extensions.data(), +#ifdef __APPLE__ + .flags = vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR, +#endif }, vk::LayerSettingsCreateInfoEXT{ .settingCount = layer_setings.size(), diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 20c99e30..16d5c237 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -37,6 +37,16 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { instance.GetPresentQueueFamilyIndex(), }; + const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const auto find_mode = [&modes](vk::PresentModeKHR requested) { + const auto it = + std::find_if(modes.begin(), modes.end(), + [&requested](vk::PresentModeKHR mode) { return mode == requested; }); + + return it != modes.end(); + }; + const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox); + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; const u32 queue_family_indices_count = exclusive ? 1u : 2u; const vk::SharingMode sharing_mode = @@ -55,7 +65,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, .compositeAlpha = composite_alpha, - .presentMode = vk::PresentModeKHR::eMailbox, + .presentMode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate, .clipped = true, .oldSwapchain = nullptr, }; From bb159eafb9ff7662432529ffaeefdb745b17234f Mon Sep 17 00:00:00 2001 From: counter185 <33550839+counter185@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:54:08 +0200 Subject: [PATCH 15/23] Basic gamepad support through SDL (#407) * Add basic gamepad support through SDL * lightbar, vibration, code style changes * okay fine * one day clang format will finally pass --- src/core/libraries/pad/pad.cpp | 20 +++++++-- src/input/controller.cpp | 26 ++++++++++++ src/input/controller.h | 7 ++++ src/sdl_window.cpp | 75 ++++++++++++++++++++++++++++++++++ src/sdl_window.h | 4 ++ 5 files changed, 128 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index d3993550..c9e332d2 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -419,8 +419,14 @@ int PS4_SYSV_ABI scePadSetForceIntercepted() { } int PS4_SYSV_ABI scePadSetLightBar(s32 handle, const OrbisPadLightBarParam* pParam) { - LOG_ERROR(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetLightBar called handle = {} rgb = {} {} {}", handle, pParam->r, + pParam->g, pParam->b); + auto* controller = Common::Singleton::Instance(); + controller->SetLightBarRGB(pParam->r, pParam->g, pParam->b); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetLightBarBaseBrightness() { @@ -479,8 +485,14 @@ int PS4_SYSV_ABI scePadSetUserColor() { } int PS4_SYSV_ABI scePadSetVibration(s32 handle, const OrbisPadVibrationParam* pParam) { - LOG_DEBUG(Lib_Pad, "(STUBBED) called"); - return ORBIS_OK; + if (pParam != nullptr) { + LOG_INFO(Lib_Pad, "scePadSetVibration called handle = {} data = {} , {}", handle, + pParam->smallMotor, pParam->largeMotor); + auto* controller = Common::Singleton::Instance(); + controller->SetVibration(pParam->smallMotor, pParam->largeMotor); + return ORBIS_OK; + } + return ORBIS_PAD_ERROR_INVALID_ARG; } int PS4_SYSV_ABI scePadSetVibrationForce() { diff --git a/src/input/controller.cpp b/src/input/controller.cpp index 247e08ce..4a3db163 100644 --- a/src/input/controller.cpp +++ b/src/input/controller.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "core/libraries/kernel/time_management.h" #include "core/libraries/pad/pad.h" #include "input/controller.h" @@ -117,4 +118,29 @@ void GameController::Axis(int id, Input::Axis axis, int value) { AddState(state); } +void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) { + if (m_sdl_gamepad != nullptr) { + SDL_SetGamepadLED(m_sdl_gamepad, r, g, b); + } +} + +bool GameController::SetVibration(u8 smallMotor, u8 largeMotor) { + if (m_sdl_gamepad != nullptr) { + return SDL_RumbleGamepad(m_sdl_gamepad, (smallMotor / 255.0f) * 0xFFFF, + (largeMotor / 255.0f) * 0xFFFF, -1) == 0; + } + return true; +} + +void GameController::TryOpenSDLController() { + if (m_sdl_gamepad == nullptr || !SDL_GamepadConnected(m_sdl_gamepad)) { + int gamepad_count; + SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); + m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; + SDL_free(gamepads); + } + + SetLightBarRGB(0, 0, 255); +} + } // namespace Input diff --git a/src/input/controller.h b/src/input/controller.h index a16f7dd0..ef099156 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -6,6 +6,8 @@ #include #include "common/types.h" +struct SDL_Gamepad; + namespace Input { enum class Axis { @@ -43,6 +45,9 @@ public: void CheckButton(int id, u32 button, bool isPressed); void AddState(const State& state); void Axis(int id, Input::Axis axis, int value); + void SetLightBarRGB(u8 r, u8 g, u8 b); + bool SetVibration(u8 smallMotor, u8 largeMotor); + void TryOpenSDLController(); private: struct StateInternal { @@ -57,6 +62,8 @@ private: u32 m_first_state = 0; std::array m_states; std::array m_private; + + SDL_Gamepad* m_sdl_gamepad = nullptr; }; } // namespace Input diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 5e1a4c95..9fd59669 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -43,6 +43,9 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); + SDL_InitSubSystem(SDL_INIT_GAMEPAD); + controller->TryOpenSDLController(); + #if defined(SDL_PLATFORM_WIN32) window_info.type = WindowSystemType::Windows; window_info.render_surface = SDL_GetPointerProperty(SDL_GetWindowProperties(window), @@ -92,6 +95,11 @@ void WindowSDL::waitEvent() { case SDL_EVENT_KEY_UP: onKeyPress(&event); break; + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + onGamepadEvent(&event); + break; case SDL_EVENT_QUIT: is_open = false; break; @@ -276,4 +284,71 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { } } +void WindowSDL::onGamepadEvent(const SDL_Event* event) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + u32 button = 0; + Input::Axis axis = Input::Axis::AxisMax; + switch (event->type) { + case SDL_EVENT_GAMEPAD_BUTTON_DOWN: + case SDL_EVENT_GAMEPAD_BUTTON_UP: + button = sdlGamepadToOrbisButton(event->gbutton.button); + if (button != 0) { + controller->CheckButton(0, button, event->type == SDL_EVENT_GAMEPAD_BUTTON_DOWN); + } + break; + case SDL_EVENT_GAMEPAD_AXIS_MOTION: + axis = event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTX ? Input::Axis::LeftX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFTY ? Input::Axis::LeftY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTX ? Input::Axis::RightX + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHTY ? Input::Axis::RightY + : event->gaxis.axis == SDL_GAMEPAD_AXIS_LEFT_TRIGGER ? Input::Axis::TriggerLeft + : event->gaxis.axis == SDL_GAMEPAD_AXIS_RIGHT_TRIGGER ? Input::Axis::TriggerRight + : Input::Axis::AxisMax; + if (axis != Input::Axis::AxisMax) { + controller->Axis(0, axis, Input::GetAxis(-0x8000, 0x8000, event->gaxis.value)); + } + break; + } +} + +int WindowSDL::sdlGamepadToOrbisButton(u8 button) { + using Libraries::Pad::OrbisPadButtonDataOffset; + + switch (button) { + case SDL_GAMEPAD_BUTTON_DPAD_DOWN: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_DOWN; + case SDL_GAMEPAD_BUTTON_DPAD_UP: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_UP; + case SDL_GAMEPAD_BUTTON_DPAD_LEFT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_LEFT; + case SDL_GAMEPAD_BUTTON_DPAD_RIGHT: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_RIGHT; + case SDL_GAMEPAD_BUTTON_SOUTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CROSS; + case SDL_GAMEPAD_BUTTON_NORTH: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TRIANGLE; + case SDL_GAMEPAD_BUTTON_WEST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_SQUARE; + case SDL_GAMEPAD_BUTTON_EAST: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_CIRCLE; + case SDL_GAMEPAD_BUTTON_START: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_OPTIONS; + case SDL_GAMEPAD_BUTTON_TOUCHPAD: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_BACK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_TOUCH_PAD; + case SDL_GAMEPAD_BUTTON_LEFT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L1; + case SDL_GAMEPAD_BUTTON_RIGHT_SHOULDER: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R1; + case SDL_GAMEPAD_BUTTON_LEFT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_L3; + case SDL_GAMEPAD_BUTTON_RIGHT_STICK: + return OrbisPadButtonDataOffset::ORBIS_PAD_BUTTON_R3; + default: + return 0; + } +} + } // namespace Frontend diff --git a/src/sdl_window.h b/src/sdl_window.h index 02d01128..cf6c3711 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -7,6 +7,7 @@ #include "common/types.h" struct SDL_Window; +struct SDL_Gamepad; union SDL_Event; namespace Input { @@ -66,6 +67,9 @@ public: private: void onResize(); void onKeyPress(const SDL_Event* event); + void onGamepadEvent(const SDL_Event* event); + + int sdlGamepadToOrbisButton(u8 button); private: s32 width; From d8b9d82ffaa2e0931b154a646d1573e535bc951f Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 13 Aug 2024 20:05:10 +0300 Subject: [PATCH 16/23] video_core: Various fixes (#423) * video_core: Various fixes * clang format --- src/core/libraries/kernel/libkernel.cpp | 1 - .../frontend/translate/translate.cpp | 2 +- .../ir/passes/resource_tracking_pass.cpp | 7 ++++ src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 32 ++++++++++++++++++- .../renderer_vulkan/liverpool_to_vk.cpp | 2 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +++++ 7 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index e2625819..9657ba04 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -360,7 +360,6 @@ int PS4_SYSV_ABI posix_connect() { } int PS4_SYSV_ABI _sigprocmask() { - LOG_DEBUG(Lib_Kernel, "STUBBED"); return ORBIS_OK; } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 8ffde7fb..d48e4def 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -162,7 +162,7 @@ T Translator::GetSrc(const InstOperand& operand) { } } else { if (operand.input_modifier.abs) { - UNREACHABLE(); + LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction"); } if (operand.input_modifier.neg) { UNREACHABLE(); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index bacbac72..e6d5c48c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -494,6 +494,13 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const auto tsharp = TrackSharp(tsharp_handle); const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + if (!image.Valid()) { + LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith( + ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f))); + return; + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 517f9d53..a9665a02 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -408,7 +408,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); + // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); // Optimization: VO label waits are special because the emulator // will write to the label when presentation is finished. So if // there are no other submits to yield to we can sleep the thread diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 779e5536..98b4aba5 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -867,6 +867,33 @@ struct Liverpool { } }; + union ShaderStageEnable { + u32 raw; + BitField<0, 2, u32> ls_en; + BitField<2, 1, u32> hs_en; + BitField<3, 2, u32> es_en; + BitField<5, 1, u32> gs_en; + BitField<6, 1, u32> vs_en; + + bool IsStageEnabled(u32 stage) { + switch (stage) { + case 0: + case 1: + return true; + case 2: + return gs_en.Value(); + case 3: + return es_en.Value(); + case 4: + return hs_en.Value(); + case 5: + return ls_en.Value(); + default: + UNREACHABLE(); + } + } + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -945,7 +972,9 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1); + INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1); + ShaderStageEnable stage_enable; + INSERT_PADDING_WORDS(9); PolygonOffset poly_offset; INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); AaConfig aa_config; @@ -1140,6 +1169,7 @@ static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 01526265..04e830c0 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -81,6 +81,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::PatchPrimitive: + return vk::PrimitiveTopology::ePatchList; case Liverpool::PrimitiveType::QuadList: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8a22b925..38d1f51b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -115,6 +115,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, } const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { + // Tessellation is unsupported so skip the draw to avoid locking up the driver. + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) { + return nullptr; + } RefreshGraphicsKey(); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { @@ -203,6 +207,10 @@ void PipelineCache::RefreshGraphicsKey() { } for (u32 i = 0; i < MaxShaderStages; i++) { + if (!regs.stage_enable.IsStageEnabled(i)) { + key.stage_hashes[i] = 0; + continue; + } auto* pgm = regs.ProgramForStage(i); if (!pgm || !pgm->Address()) { key.stage_hashes[i] = 0; From ad3b6c793c379b0590a6ffed532c6be12b8bb099 Mon Sep 17 00:00:00 2001 From: Samuel Fontes <43213783+SamuelFontes@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:21:06 -0300 Subject: [PATCH 17/23] qt-gui: Added GPU device selection functionality (#399) * qt-gui: Added GPU device selection functionality * Getting list of GPU only when application starts * Fixed formatting * Fixed formatting * Fixed formatting * Added warning when GPU doesn't support API version. * Changed Unsupported Vulkan Version warning * Removed unused size checking on GetPhysicalDevices The method is only being called once so this doesn't make sense. It was some left over of me trying to get this done some other way. * Fix formatting * Fix formatting * SettingsDialog: Passing physical devices as span * Fixed formatting --- src/common/config.cpp | 5 +++++ src/common/config.h | 1 + src/qt_gui/main_window.cpp | 17 ++++++++++++++++- src/qt_gui/main_window.h | 3 +++ src/qt_gui/settings_dialog.cpp | 15 +++++++++++++-- src/qt_gui/settings_dialog.h | 3 ++- 6 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 3cf9af15..a65a5b59 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -124,6 +124,10 @@ bool vkValidationGpuEnabled() { return vkValidationGpu; } +void setGpuId(s32 selectedGpuId) { + gpuId = selectedGpuId; +} + void setScreenWidth(u32 width) { screenWidth = width; } @@ -451,6 +455,7 @@ void setDefaultValues() { vkValidation = false; rdocEnable = false; m_language = 1; + gpuId = -1; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 37ace79c..97055028 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -36,6 +36,7 @@ void setNullGpu(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); +void setGpuId(s32 selectedGpuId); void setScreenWidth(u32 width); void setScreenHeight(u32 height); void setFullscreenMode(bool enable); diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 55bd5640..aec2e7a5 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -16,6 +16,7 @@ #include "game_install_dialog.h" #include "main_window.h" #include "settings_dialog.h" +#include "video_core/renderer_vulkan/vk_instance.h" MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); @@ -39,6 +40,7 @@ bool MainWindow::Init() { CreateConnects(); SetLastUsedTheme(); SetLastIconSizeBullet(); + GetPhysicalDevices(); // show ui setMinimumSize(350, minimumSizeHint().height()); setWindowTitle(QString::fromStdString("shadPS4 v" + std::string(Common::VERSION))); @@ -158,6 +160,19 @@ void MainWindow::LoadGameLists() { } } +void MainWindow::GetPhysicalDevices() { + Vulkan::Instance instance(false, false); + auto physical_devices = instance.GetPhysicalDevices(); + for (const vk::PhysicalDevice physical_device : physical_devices) { + auto prop = physical_device.getProperties(); + QString name = QString::fromUtf8(prop.deviceName, -1); + if (prop.apiVersion < Vulkan::TargetVulkanApiVersion) { + name += " * Unsupported Vulkan Version"; + } + m_physical_devices.push_back(name); + } +} + void MainWindow::CreateConnects() { connect(this, &MainWindow::WindowResized, this, &MainWindow::HandleResize); connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); @@ -187,7 +202,7 @@ void MainWindow::CreateConnects() { &MainWindow::StartGame); connect(ui->settingsButton, &QPushButton::clicked, this, [this]() { - auto settingsDialog = new SettingsDialog(this); + auto settingsDialog = new SettingsDialog(m_physical_devices, this); settingsDialog->exec(); }); diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index 39a5d049..35fd0bf6 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -54,6 +54,7 @@ private: void CreateActions(); void CreateRecentGameActions(); void CreateDockWindows(); + void GetPhysicalDevices(); void LoadGameLists(); void CreateConnects(); void SetLastUsedTheme(); @@ -79,6 +80,8 @@ private: QScopedPointer m_elf_viewer; // Status Bar. QScopedPointer statusBar; + // Available GPU devices + std::vector m_physical_devices; PSF psf; diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 722abe7e..bde0eada 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -4,13 +4,20 @@ #include "settings_dialog.h" #include "ui_settings_dialog.h" -SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::SettingsDialog) { +SettingsDialog::SettingsDialog(std::span physical_devices, QWidget* parent) + : QDialog(parent), ui(new Ui::SettingsDialog) { ui->setupUi(this); ui->tabWidgetSettings->setUsesScrollButtons(false); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); ui->buttonBox->button(QDialogButtonBox::StandardButton::Close)->setFocus(); + // Add list of available GPUs + ui->graphicsAdapterBox->addItem("Auto Select"); // -1, auto selection + for (const auto& device : physical_devices) { + ui->graphicsAdapterBox->addItem(device); + } + LoadValuesFromConfig(); connect(ui->buttonBox, &QDialogButtonBox::rejected, this, &QWidget::close); @@ -40,7 +47,10 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se // GPU TAB { - // TODO: Implement graphics device changing + // First options is auto selection -1, so gpuId on the GUI will always have to subtract 1 + // when setting and add 1 when getting to select the correct gpu in Qt + connect(ui->graphicsAdapterBox, &QComboBox::currentIndexChanged, this, + [](int index) { Config::setGpuId(index - 1); }); connect(ui->widthSpinBox, &QSpinBox::valueChanged, this, [](int val) { Config::setScreenWidth(val); }); @@ -98,6 +108,7 @@ SettingsDialog::SettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::Se void SettingsDialog::LoadValuesFromConfig() { ui->consoleLanguageComboBox->setCurrentIndex(Config::GetLanguage()); + ui->graphicsAdapterBox->setCurrentIndex(Config::getGpuId() + 1); ui->widthSpinBox->setValue(Config::getScreenWidth()); ui->heightSpinBox->setValue(Config::getScreenHeight()); ui->vblankSpinBox->setValue(Config::vblankDiv()); diff --git a/src/qt_gui/settings_dialog.h b/src/qt_gui/settings_dialog.h index 2bffa795..7d870109 100644 --- a/src/qt_gui/settings_dialog.h +++ b/src/qt_gui/settings_dialog.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -16,7 +17,7 @@ class SettingsDialog; class SettingsDialog : public QDialog { Q_OBJECT public: - explicit SettingsDialog(QWidget* parent = nullptr); + explicit SettingsDialog(std::span physical_devices, QWidget* parent = nullptr); ~SettingsDialog(); int exec() override; From 27cb218584d22231766079054a0157686b58a90c Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:36:11 +0200 Subject: [PATCH 18/23] video_core: CPU flip relay (#415) * video_core: cpu flip is propagated via gpu thread now * tentative fix for cpu flips racing * libraries: videoout: better flip status handling --- src/core/libraries/videoout/driver.cpp | 66 +++++++++++++------ src/core/libraries/videoout/driver.h | 3 +- src/core/libraries/videoout/video_out.cpp | 5 +- src/video_core/amdgpu/liverpool.cpp | 22 ++++++- src/video_core/amdgpu/liverpool.h | 11 ++++ .../renderer_vulkan/renderer_vulkan.h | 10 ++- .../texture_cache/texture_cache.cpp | 8 ++- src/video_core/texture_cache/texture_cache.h | 6 +- 8 files changed, 98 insertions(+), 33 deletions(-) diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 97b1816e..25de48a4 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -9,6 +9,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" extern std::unique_ptr renderer; @@ -173,14 +174,19 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { // Update flip status. auto* port = req.port; - auto& flip_status = port->flip_status; - flip_status.count++; - flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); - flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); - flip_status.flipArg = req.flip_arg; - flip_status.currentBuffer = req.index; - flip_status.flipPendingNum = static_cast(requests.size()); + { + std::unique_lock lock{port->port_mutex}; + auto& flip_status = port->flip_status; + flip_status.count++; + flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); + flip_status.tsc = Libraries::Kernel::sceKernelReadTsc(); + flip_status.flipArg = req.flip_arg; + flip_status.currentBuffer = req.index; + if (req.eop) { + --flip_status.gcQueueNum; + } + --flip_status.flipPendingNum; + } // Trigger flip events for the port. for (auto& event : port->flip_events) { @@ -202,34 +208,54 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) { bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop /*= false*/) { + { + std::unique_lock lock{port->port_mutex}; + if (index != -1 && port->flip_status.flipPendingNum >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); + return false; + } + + if (is_eop) { + ++port->flip_status.gcQueueNum; + } + ++port->flip_status.flipPendingNum; // integral GPU and CPU pending flips counter + port->flip_status.submitTsc = Libraries::Kernel::sceKernelReadTsc(); + } + + if (!is_eop) { + // Before processing the flip we need to ask GPU thread to flush command list as at this + // point VO surface is ready to be presented, and we will need have an actual state of + // Vulkan image at the time of frame presentation. + liverpool->SendCommand([=, this]() { + renderer->FlushDraw(); + SubmitFlipInternal(port, index, flip_arg, is_eop); + }); + } else { + SubmitFlipInternal(port, index, flip_arg, is_eop); + } + + return true; +} + +void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { Vulkan::Frame* frame; if (index == -1) { - frame = renderer->PrepareBlankFrame(); + frame = renderer->PrepareBlankFrame(is_eop); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); } - if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) { - LOG_ERROR(Lib_VideoOut, "Flip queue is full"); - return false; - } - std::scoped_lock lock{mutex}; requests.push({ .frame = frame, .port = port, .index = index, .flip_arg = flip_arg, - .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), .eop = is_eop, }); - - port->flip_status.flipPendingNum = static_cast(requests.size()); - port->flip_status.gcQueueNum = 0; - - return true; } void VideoOutDriver::PresentThread(std::stop_token token) { diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 104056de..bee80060 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -29,6 +29,7 @@ struct VideoOutPort { std::vector flip_events; std::vector vblank_events; std::mutex vo_mutex; + std::mutex port_mutex; std::condition_variable vo_cv; std::condition_variable vblank_cv; int flip_rate = 0; @@ -93,7 +94,6 @@ private: VideoOutPort* port; s32 index; s64 flip_arg; - u64 submit_tsc; bool eop; operator bool() const noexcept { @@ -102,6 +102,7 @@ private: }; std::chrono::microseconds Flip(const Request& req); + void SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void PresentThread(std::stop_token token); std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 15e14662..acfcbad4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -113,7 +113,9 @@ s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate) { s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle) { LOG_INFO(Lib_VideoOut, "called"); - s32 pending = driver->GetPort(handle)->flip_status.flipPendingNum; + auto* port = driver->GetPort(handle); + std::unique_lock lock{port->port_mutex}; + s32 pending = port->flip_status.flipPendingNum; return pending; } @@ -161,6 +163,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; } + std::unique_lock lock{port->port_mutex}; *status = port->flip_status; LOG_INFO(Lib_VideoOut, diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a9665a02..dce2d4b4 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) { { std::unique_lock lk{submit_mutex}; Common::CondvarWait(submit_cv, lk, stoken, - [this] { return num_submits != 0 || submit_done; }); + [this] { return num_commands || num_submits || submit_done; }); } if (stoken.stop_requested()) { break; @@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) { int qid = -1; - while (num_submits) { + while (num_submits || num_commands) { + + // Process incoming commands with high priority + while (num_commands) { + + Common::UniqueFunction callback{}; + { + std::unique_lock lk{submit_mutex}; + callback = std::move(command_queue.back()); + command_queue.pop(); + } + + callback(); + + --num_commands; + } + qid = (qid + 1) % NumTotalQueues; auto& queue = mapped_queues[qid]; @@ -219,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span #include #include + #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" #include "common/types.h" +#include "common/unique_function.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -1054,6 +1056,13 @@ public: rasterizer = rasterizer_; } + void SendCommand(Common::UniqueFunction&& func) { + std::scoped_lock lk{submit_mutex}; + command_queue.emplace(std::move(func)); + ++num_commands; + submit_cv.notify_one(); + } + private: struct Task { struct promise_type { @@ -1122,9 +1131,11 @@ private: Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; + std::atomic num_commands{}; std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; + std::queue> command_queue{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 113b380e..eab9d527 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -48,13 +48,14 @@ public: VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info); + texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); auto& image = texture_cache.GetImage(image_id); return PrepareFrameInternal(image, is_eop); } - Frame* PrepareBlankFrame() { + Frame* PrepareBlankFrame(bool is_eop) { auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image, true); + return PrepareFrameInternal(image, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -75,6 +76,11 @@ public: void Present(Frame* frame); void RecreateFrame(Frame* frame, u32 width, u32 height); + void FlushDraw() { + SubmitInfo info{}; + draw_scheduler.Flush(info); + } + private: Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* GetRenderFrame(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 6b14faac..6bc893b0 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -223,7 +223,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, return RegisterImageView(image_id, view_info); } -void TextureCache::RefreshImage(Image& image) { +void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; @@ -269,8 +269,10 @@ void TextureCache::RefreshImage(Image& image) { return; } - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); + auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler; + sched_ptr->EndRendering(); + + const auto cmdbuf = sched_ptr->CommandBuffer(); image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); const VAddr image_addr = image.info.guest_address; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b3af0ff1..137b6014 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -59,17 +59,17 @@ public: const ImageViewInfo& view_info); /// Updates image contents if it was modified by CPU. - void UpdateImage(ImageId image_id) { + void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { Image& image = slot_images[image_id]; if (False(image.flags & ImageFlagBits::CpuModified)) { return; } - RefreshImage(image); + RefreshImage(image, custom_scheduler); TrackImage(image, image_id); } /// Reuploads image contents. - void RefreshImage(Image& image); + void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); /// Retrieves the sampler that matches the provided S# descriptor. [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); From 6cc4a682fdeb4cee15120d05826c0f70681d10fc Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:18:46 +0200 Subject: [PATCH 19/23] core/memory: Fix error on virtual queries of reserved regions --- src/core/memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index eed5126c..6d0d581f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -273,10 +273,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, std::scoped_lock lk{mutex}; auto it = FindVMA(addr); - if (!it->second.IsMapped() && flags == 1) { + if (it->second.type == VMAType::Free && flags == 1) { it++; } - if (!it->second.IsMapped()) { + if (it->second.type == VMAType::Free) { LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); return ORBIS_KERNEL_ERROR_EACCES; } From d332a5e6116491cd36603bae4f6bdb9d2723dd16 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:01:17 +0300 Subject: [PATCH 20/23] spirv: Simplify shared memory handling (#427) * spirv: Simplify shared memory handling * spirv: Ignore clip plane * spirv: Fix image offsets * ir_pass: Implement shared memory lowering pass * NVIDIA doesn't like using shared mem in fragment shader and softlocks driver * spirv: Add log for ignoring pos1 --- CMakeLists.txt | 1 + .../spirv/emit_spirv_context_get_set.cpp | 4 + .../backend/spirv/emit_spirv_image.cpp | 102 +++++++++------ .../backend/spirv/emit_spirv_instructions.h | 25 ++-- .../spirv/emit_spirv_shared_memory.cpp | 122 ++---------------- .../backend/spirv/spirv_emit_context.cpp | 34 ----- .../frontend/translate/vector_memory.cpp | 4 +- src/shader_recompiler/ir/ir_emitter.cpp | 18 +-- src/shader_recompiler/ir/ir_emitter.h | 13 +- src/shader_recompiler/ir/microinstruction.cpp | 2 - src/shader_recompiler/ir/opcodes.inc | 18 +-- src/shader_recompiler/ir/passes/ir_passes.h | 1 + .../passes/lower_shared_mem_to_registers.cpp | 39 ++++++ .../ir/passes/resource_tracking_pass.cpp | 56 ++++---- .../ir/passes/shader_info_collection_pass.cpp | 12 -- src/shader_recompiler/recompiler.cpp | 3 + src/shader_recompiler/runtime_info.h | 2 - 17 files changed, 182 insertions(+), 274 deletions(-) create mode 100644 src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b92dd932..9153197c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -421,6 +421,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h + src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5eae058a..02600b94 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -214,6 +214,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { + if (attr == IR::Attribute::Position1) { + LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); + return; + } const Id pointer{OutputAttrPointer(ctx, attr, element)}; ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 72a60327..5526e541 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -17,113 +17,133 @@ struct ImageOperands { operands.push_back(value); } + void AddOffset(EmitContext& ctx, const IR::Value& offset, + bool can_use_runtime_offsets = false) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + const s32 operand = offset.U32(); + Add(spv::ImageOperandsMask::ConstOffset, ctx.ConstS32(operand)); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()))); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.ConstS32(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()))); + return; + default: + break; + } + } + if (can_use_runtime_offsets) { + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } else { + LOG_WARNING(Render_Vulkan, + "Runtime offset provided to unsupported image sample instruction"); + } + } + spv::ImageOperandsMask mask{}; boost::container::static_vector operands; }; Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, - Id offset) { + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - if (Sirit::ValidId(bias)) { - operands.Add(spv::ImageOperandsMask::Bias, bias); - } - if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::Offset, offset); - } + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, - Id offset) { + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - if (Sirit::ValidId(lod)) { - operands.Add(spv::ImageOperandsMask::Lod, lod); - } - if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::Offset, offset); - } + operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.AddOffset(ctx, offset); return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask, operands.operands); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias, Id offset) { + Id bias, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - if (Sirit::ValidId(bias)) { - operands.Add(spv::ImageOperandsMask::Bias, bias); - } - if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::Offset, offset); - } + operands.Add(spv::ImageOperandsMask::Bias, bias); + operands.AddOffset(ctx, offset); return ctx.OpImageSampleDrefImplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, operands.operands); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id lod, Id offset) { + Id lod, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - if (Sirit::ValidId(lod)) { - operands.Add(spv::ImageOperandsMask::Lod, lod); - } - if (Sirit::ValidId(offset)) { - operands.Add(spv::ImageOperandsMask::Offset, offset); - } + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); return ctx.OpImageSampleDrefExplicitLod(ctx.F32[1], sampled_image, coords, dref, operands.mask, operands.operands); } -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2) { +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); const u32 comp = inst->Flags().gather_comp.Value(); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.AddOffset(ctx, offset); return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask, operands.operands); } -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref) { +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.Add(spv::ImageOperandsMask::Offset, offset); + operands.AddOffset(ctx, offset); return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask, operands.operands); } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); - if (Sirit::ValidId(lod)) { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, - spv::ImageOperandsMask::Lod, lod)); - } else { - return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords)); - } + ImageOperands operands; + operands.AddOffset(ctx, offset); + operands.Add(spv::ImageOperandsMask::Lod, lod); + return ctx.OpBitcast( + ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85c6eaac..f868527f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -93,15 +93,9 @@ Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset); Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); @@ -358,18 +352,19 @@ Id EmitConvertU16U32(EmitContext& ctx, Id value); Id EmitConvertU32U16(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, - Id offset); + const IR::Value& offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, - Id offset); + const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id bias, Id offset); + Id bias, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, - Id lod, Id offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, - Id offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod, - Id ms); + Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, + const IR::Value& offset, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, + Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 1582d9dd..57ea476f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -5,99 +5,25 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { -namespace { -Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { - const Id shift_id{ctx.ConstU32(shift)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); -} -Id Word(EmitContext& ctx, Id offset) { +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } -std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { - const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(3U))}; - const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.ConstU32(mask))}; - const Id count_id{ctx.ConstU32(count)}; - return {bit, count_id}; -} -} // Anonymous namespace - -Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); - } else { - const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); - } -} - -Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; - return ctx.OpLoad(ctx.U32[1], pointer); - } else { - return Word(ctx, offset); - } -} - Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - return ctx.OpLoad(ctx.U32[2], pointer); - } else { - const Id shift_id{ctx.ConstU32(2U)}; - const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; - return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), - ctx.OpLoad(ctx.U32[1], rhs_pointer)); - } + const Id shift_id{ctx.ConstU32(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); } Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - return ctx.OpLoad(ctx.U32[4], pointer); - } const Id shift_id{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; std::array values{}; @@ -109,35 +35,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { return ctx.OpCompositeConstruct(ctx.U32[4], values); } -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); -} - -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; - ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); -} - void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { - Id pointer{}; - if (ctx.profile.support_explicit_workgroup_layout) { - pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); - } else { - const Id shift{ctx.ConstU32(2U)}; - const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); - } + const Id shift{ctx.ConstU32(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); ctx.OpStore(pointer, value); } void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))}; @@ -148,11 +53,6 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { - const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; - ctx.OpStore(pointer, value); - return; - } const Id shift{ctx.ConstU32(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 55754d45..fef0666a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -513,43 +513,9 @@ void EmitContext::DefineSharedMemory() { if (info.shared_memory_size == 0) { info.shared_memory_size = DefaultSharedMemSize; } - const auto make{[&](Id element_type, u32 element_size) { - const u32 num_elements{Common::DivCeil(info.shared_memory_size, element_size)}; - const Id array_type{TypeArray(element_type, ConstU32(num_elements))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); - Decorate(struct_type, spv::Decoration::Block); - - const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; - const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; - const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; - Decorate(variable, spv::Decoration::Aliased); - interfaces.push_back(variable); - - return std::make_tuple(variable, element_pointer, pointer); - }}; - if (profile.support_explicit_workgroup_layout) { - AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (info.uses_shared_u8) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); - } - if (info.uses_shared_u16) { - AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); - } - std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); - return; - } const u32 num_elements{Common::DivCeil(info.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], ConstU32(num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); - shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index bb202e42..f708b9fb 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -250,10 +250,10 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { const IR::Value texel = [&]() -> IR::Value { const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; if (!flags.test(MimgModifier::Pcf)) { - return ir.ImageGather(handle, body, offset, {}, info); + return ir.ImageGather(handle, body, offset, info); } ASSERT(mimg.dmask & 1); // should be always 1st (R) component - return ir.ImageGatherDref(handle, body, offset, {}, dref, info); + return ir.ImageGatherDref(handle, body, offset, dref, info); }(); // For gather4 instructions dmask selects which component to read and must have diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 08b7fbbc..3ff347fb 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -259,10 +259,6 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { - case 8: - return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); - case 16: - return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); case 32: return Inst(Opcode::LoadSharedU32, offset); case 64: @@ -276,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { switch (bit_size) { - case 8: - Inst(Opcode::WriteSharedU8, offset, value); - break; - case 16: - Inst(Opcode::WriteSharedU16, offset, value); - break; case 32: Inst(Opcode::WriteSharedU32, offset, value); break; @@ -1398,13 +1388,13 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info) { - return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2); + TextureInstInfo info) { + return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset); } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { - return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref); + const F32& dref, TextureInstInfo info) { + return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index fda20639..c226edac 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -256,18 +256,17 @@ public: const F32& dref, const U32& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - const IR::U1& skip_mips, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, + const U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, TextureInstInfo info); + TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, - const Value& offset, const Value& offset2, const F32& dref, - TextureInstInfo info); + const Value& offset, const F32& dref, TextureInstInfo info); [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index aa03e3d6..5d413c8a 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -59,8 +59,6 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: - case Opcode::WriteSharedU16: - case Opcode::WriteSharedU8: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 46918bc3..0e25b777 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -26,15 +26,9 @@ OPCODE(WorkgroupMemoryBarrier, Void, OPCODE(DeviceMemoryBarrier, Void, ) // Shared memory operations -OPCODE(LoadSharedU8, U32, U32, ) -OPCODE(LoadSharedS8, U32, U32, ) -OPCODE(LoadSharedU16, U32, U32, ) -OPCODE(LoadSharedS16, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU64, U32x2, U32, ) OPCODE(LoadSharedU128, U32x4, U32, ) -OPCODE(WriteSharedU8, Void, U32, U32, ) -OPCODE(WriteSharedU16, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU128, Void, U32, U32x4, ) @@ -298,12 +292,12 @@ OPCODE(ConvertU16U32, U16, U32, OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, U32, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, U32, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, U32, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, U32, ) -OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index bf2ba4d6..7e2b962b 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); +void LowerSharedMemToRegisters(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp new file mode 100644 index 00000000..a87cf31b --- /dev/null +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void LowerSharedMemToRegisters(IR::Program& program) { + boost::container::small_vector ds_writes; + Info& info{program.info}; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + const auto opcode = inst.GetOpcode(); + if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) { + ds_writes.emplace_back(&inst); + continue; + } + if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) { + // Search for write instruction with same offset + const IR::Inst* prod = inst.Arg(0).InstRecursive(); + const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) { + const IR::Inst* write_prod = write->Arg(0).InstRecursive(); + return write_prod->Arg(1).U32() == prod->Arg(1).U32() && + write_prod->Arg(0) == prod->Arg(0); + }); + ASSERT(it != ds_writes.end()); + // Replace data read with value written. + inst.ReplaceUsesWith((*it)->Arg(1)); + } + } + } + // We should have eliminated everything. Invalidate data write instructions. + for (const auto inst : ds_writes) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e6d5c48c..b3d2311e 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -171,6 +171,22 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { } } +u32 ImageOffsetArgumentPosition(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + return 2; + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + return 3; + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + return 4; + default: + UNREACHABLE(); + } +} + class Descriptors { public: explicit Descriptors(Info& info_) @@ -574,33 +590,29 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_offset) { // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const u32 arg_pos = [&]() -> u32 { - switch (inst.GetOpcode()) { - case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: - return 2; - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleImplicitLod: - return 3; - case IR::Opcode::ImageSampleDrefExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: - return 4; - default: - break; - } - return inst_info.is_depth ? 4 : 3; - }(); + const u32 arg_pos = ImageOffsetArgumentPosition(inst); const IR::Value arg = inst.Arg(arg_pos); ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - const auto f = [&](IR::Value value, u32 offset) -> auto { + + const auto read = [&](u32 offset) -> auto { return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); }; - const auto x = f(arg, 0); - const auto y = f(arg, 8); - const auto z = f(arg, 16); - const IR::Value value = ir.CompositeConstruct(x, y, z); - inst.SetArg(arg_pos, value); + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + inst.SetArg(arg_pos, read(0)); + break; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8))); + break; + case AmdGpu::ImageType::Color3D: + inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16))); + break; + default: + UNREACHABLE(); + } } if (inst_info.has_lod_clamp) { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 7100b384..52087a65 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -16,18 +16,6 @@ void Visit(Info& info, IR::Inst& inst) { info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); break; } - case IR::Opcode::LoadSharedS8: - case IR::Opcode::LoadSharedU8: - case IR::Opcode::WriteSharedU8: - info.uses_shared_u8 = true; - info.uses_shared = true; - break; - case IR::Opcode::LoadSharedS16: - case IR::Opcode::LoadSharedU16: - case IR::Opcode::WriteSharedU16: - info.uses_shared_u16 = true; - info.uses_shared = true; - break; case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 69eec50f..0f9fd6d4 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -58,6 +58,9 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + if (program.info.stage != Stage::Compute) { + Shader::Optimization::LowerSharedMemToRegisters(program); + } Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index b936e06a..9b592e12 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -195,8 +195,6 @@ struct Info { bool has_image_query{}; bool uses_group_quad{}; bool uses_shared{}; - bool uses_shared_u8{}; - bool uses_shared_u16{}; bool uses_fp16{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions From 5f963772a07a5dc93b64fd98d3693c125295acfe Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:43:00 -0500 Subject: [PATCH 21/23] scePthreadAttrSetstack implementation (#391) * scePthreadAttrSetstack implementation Used by Final Fantasy XV * Address Comments Verify parameters before calling the pthread_attr_setstack function. Swap uses of SCE prefix with ORBIS prefix. * Quick fix Addresses the newest review and appears to fix issues caused in games by my previous commit. --- src/core/libraries/kernel/thread_management.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index cdd729da..6319b7c2 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1094,6 +1094,19 @@ int PS4_SYSV_ABI scePthreadAttrGetstack(ScePthreadAttr* attr, void** addr, size_ return SCE_KERNEL_ERROR_EINVAL; } +int PS4_SYSV_ABI scePthreadAttrSetstack(ScePthreadAttr* attr, void* addr, size_t size) { + if (attr == nullptr || *attr == nullptr || addr == nullptr || size < 0x4000) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + int result = pthread_attr_setstack(&(*attr)->pth_attr, addr, size); + LOG_INFO(Kernel_Pthread, "scePthreadAttrSetstack: result = {}", result); + + if (result == 0) { + return ORBIS_OK; + } + return ORBIS_KERNEL_ERROR_EINVAL; +} + int PS4_SYSV_ABI scePthreadJoin(ScePthread thread, void** res) { int result = pthread_join(thread->pth, res); LOG_INFO(Kernel_Pthread, "scePthreadJoin result = {}", result); @@ -1550,6 +1563,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("B5GmVDKwpn0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_yield); LIB_FUNCTION("-quPa4SEJUw", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstack); + LIB_FUNCTION("Bvn74vj6oLo", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstack); LIB_FUNCTION("Ru36fiTtJzA", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstackaddr); LIB_FUNCTION("-fA+7ZlGDQs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetstacksize); LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, scePthreadOnce); From 6f4e1a47b9caa534956f2206533151c6cbe23bb8 Mon Sep 17 00:00:00 2001 From: Dzmitry Dubrova Date: Wed, 14 Aug 2024 21:37:05 +0300 Subject: [PATCH 22/23] core: misc changes (#430) * core: misc changes * video_core: add some formats for detiling * clang format --- .../libraries/kernel/memory_management.cpp | 13 +++++++++++-- src/core/libraries/np_manager/np_manager.cpp | 7 +++++-- src/core/libraries/np_manager/np_manager.h | 18 +++++++++++++++++- .../backend/spirv/spirv_emit_context.cpp | 4 ++++ .../renderer_vulkan/liverpool_to_vk.cpp | 4 ++++ .../renderer_vulkan/vk_swapchain.cpp | 1 + src/video_core/texture_cache/tile_manager.cpp | 2 ++ 7 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 54c5860f..826d4797 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -74,13 +74,22 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE size_t* sizeOut) { LOG_WARNING(Kernel_Vmm, "called searchStart = {:#x}, searchEnd = {:#x}, alignment = {:#x}", searchStart, searchEnd, alignment); + + if (searchEnd <= searchStart) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + if (searchEnd > SCE_KERNEL_MAIN_DMEM_SIZE) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + auto* memory = Core::Memory::Instance(); PAddr physAddr; - s32 size = memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); + s32 result = + memory->DirectQueryAvailable(searchStart, searchEnd, alignment, &physAddr, sizeOut); *physAddrOut = static_cast(physAddr); - return size; + return result; } s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index 33308abc..fd4e31f5 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -974,8 +974,11 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { return ORBIS_OK; } -int PS4_SYSV_ABI sceNpGetNpId() { - LOG_ERROR(Lib_NpManager, "(STUBBED) called"); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId) { + LOG_ERROR(Lib_NpManager, "(DUMMY) called"); + + std::string name = "shadps4"; + strcpy(npId->handle.data, name.c_str()); return ORBIS_OK; } diff --git a/src/core/libraries/np_manager/np_manager.h b/src/core/libraries/np_manager/np_manager.h index 5b11355a..5955a40b 100644 --- a/src/core/libraries/np_manager/np_manager.h +++ b/src/core/libraries/np_manager/np_manager.h @@ -11,6 +11,22 @@ class SymbolsResolver; namespace Libraries::NpManager { +constexpr int ORBIS_NP_ONLINEID_MAX_LENGTH = 16; + +typedef int OrbisUserServiceUserId; + +struct OrbisNpOnlineId { + char data[ORBIS_NP_ONLINEID_MAX_LENGTH]; + char term; + char dummy[3]; +}; + +struct OrbisNpId { + OrbisNpOnlineId handle; + u8 opt[8]; + u8 reserved[8]; +}; + int PS4_SYSV_ABI Func_EF4378573542A508(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromKernel(); int PS4_SYSV_ABI _sceNpIpcCreateMemoryFromPool(); @@ -204,7 +220,7 @@ int PS4_SYSV_ABI sceNpGetAccountLanguage2(); int PS4_SYSV_ABI sceNpGetAccountLanguageA(); int PS4_SYSV_ABI sceNpGetGamePresenceStatus(); int PS4_SYSV_ABI sceNpGetGamePresenceStatusA(); -int PS4_SYSV_ABI sceNpGetNpId(); +int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId userId, OrbisNpId* npId); int PS4_SYSV_ABI sceNpGetNpReachabilityState(); int PS4_SYSV_ABI sceNpGetOnlineId(); int PS4_SYSV_ABI sceNpGetParentalControlInfo(); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index fef0666a..4b732ecd 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -407,6 +407,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rgba16f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { + return spv::ImageFormat::Rgba16; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 && image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { return spv::ImageFormat::R8; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 04e830c0..4fc32ab2 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -341,6 +341,7 @@ std::span GetAllFormats() { vk::Format::eR32Sint, vk::Format::eR32Uint, vk::Format::eBc6HUfloatBlock, + vk::Format::eBc6HSfloatBlock, vk::Format::eR16G16Unorm, vk::Format::eR16G16B16A16Sscaled, vk::Format::eR16G16Sscaled, @@ -542,6 +543,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc6HUfloatBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc6 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eBc6HSfloatBlock; + } if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR8G8B8A8Sint; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 16d5c237..dcc19bf3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -93,6 +93,7 @@ bool Swapchain::AcquireNextImage() { case vk::Result::eSuboptimalKHR: case vk::Result::eErrorSurfaceLostKHR: case vk::Result::eErrorOutOfDateKHR: + case vk::Result::eErrorUnknown: needs_recreation = true; break; default: diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 6447fde1..f08f2094 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -186,6 +186,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: case vk::Format::eR16G16Sfloat: + case vk::Format::eR16G16Unorm: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaSrgbBlock: case vk::Format::eBc1RgbaUnormBlock: @@ -193,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Uint: case vk::Format::eR16G16B16A16Unorm: + case vk::Format::eR16G16B16A16Uint: case vk::Format::eR16G16B16A16Sfloat: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: From 9adc6382201abf609bebb69e841a3de80b15554e Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Thu, 15 Aug 2024 00:15:07 +0200 Subject: [PATCH 23/23] shader_recompiler: basic implementation of `BUFFER_STORE_FORMAT_` (#431) * shader_recompiler: basic implementation of buffer store w\ fmt conversion * added `Format16` dfmt --- .../spirv/emit_spirv_context_get_set.cpp | 92 +++++++++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 4 + .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_memory.cpp | 35 +++++-- src/shader_recompiler/ir/ir_emitter.cpp | 20 ++++ src/shader_recompiler/ir/ir_emitter.h | 2 + src/shader_recompiler/ir/microinstruction.cpp | 4 + src/shader_recompiler/ir/opcodes.inc | 4 + .../ir/passes/resource_tracking_pass.cpp | 12 +++ 9 files changed, 165 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 02600b94..bbf259fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -467,4 +467,96 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address EmitStoreBufferF32xN<1>(ctx, handle, address, value); } +static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, + u32 bit_width) { + switch (format) { + case AmdGpu::NumberFormat::Unorm: + return ctx.OpConvertFToU( + ctx.U32[1], ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))))); + case AmdGpu::NumberFormat::Uint: + return ctx.OpBitcast(ctx.U32[1], value); + case AmdGpu::NumberFormat::Float: + return value; + default: + UNREACHABLE_MSG("Unsupported number fromat for conversion: {}", + magic_enum::enum_name(format)); + } +} + +template +static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { + auto& buffer = ctx.buffers[handle]; + const auto format = buffer.buffer.GetDataFmt(); + const auto num_format = buffer.buffer.GetNumberFmt(); + + switch (format) { + case AmdGpu::DataFormat::FormatInvalid: + return; + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32_32_32: { + ASSERT(N == AmdGpu::NumComponents(format)); + + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + + Id packed_value{}; + for (u32 i = 0; i < N; i++) { + const u32 bit_width = AmdGpu::ComponentBits(format, i); + const u32 bit_offset = AmdGpu::ComponentOffset(format, i) % 32; + + const Id comp{ConvertF32ToFormat( + ctx, N == 1 ? value : ctx.OpCompositeExtract(ctx.F32[1], value, i), num_format, + bit_width)}; + + if (bit_width == 32) { + if constexpr (N == 1) { + ctx.OpStore(ptr, comp); + } else { + const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, + ctx.u32_zero_value, index_i); + ctx.OpStore(ptr, comp); + } + } else { + if (i == 0) { + packed_value = comp; + } else { + packed_value = + ctx.OpBitFieldInsert(ctx.U32[1], packed_value, comp, + ctx.ConstU32(bit_offset), ctx.ConstU32(bit_width)); + } + + if (i == N - 1) { + ctx.OpStore(ptr, packed_value); + } + } + } + } break; + default: + UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format)); + } +} + +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value); +} + +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index f868527f..8a0fcd4b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -76,6 +76,10 @@ void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2e12209d..9ebcb116 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -186,7 +186,7 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); - void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); + void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index f708b9fb..63f6c3b4 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -53,6 +53,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_GET_RESINFO: return IMAGE_GET_RESINFO(inst); + // Buffer load operations case Opcode::TBUFFER_LOAD_FORMAT_X: return BUFFER_LOAD_FORMAT(1, true, true, inst); case Opcode::TBUFFER_LOAD_FORMAT_XY: @@ -61,6 +62,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, true, true, inst); case Opcode::TBUFFER_LOAD_FORMAT_XYZW: return BUFFER_LOAD_FORMAT(4, true, true, inst); + case Opcode::BUFFER_LOAD_FORMAT_X: return BUFFER_LOAD_FORMAT(1, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XY: @@ -69,6 +71,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XYZW: return BUFFER_LOAD_FORMAT(4, false, true, inst); + case Opcode::BUFFER_LOAD_DWORD: return BUFFER_LOAD_FORMAT(1, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX2: @@ -77,16 +80,25 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_LOAD_FORMAT(3, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX4: return BUFFER_LOAD_FORMAT(4, false, false, inst); + + // Buffer store operations case Opcode::BUFFER_STORE_FORMAT_X: - case Opcode::BUFFER_STORE_DWORD: - return BUFFER_STORE_FORMAT(1, false, inst); - case Opcode::BUFFER_STORE_DWORDX2: - return BUFFER_STORE_FORMAT(2, false, inst); - case Opcode::BUFFER_STORE_DWORDX3: - return BUFFER_STORE_FORMAT(3, false, inst); + return BUFFER_STORE_FORMAT(1, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XY: + return BUFFER_STORE_FORMAT(2, false, true, inst); + case Opcode::BUFFER_STORE_FORMAT_XYZ: + return BUFFER_STORE_FORMAT(3, false, true, inst); case Opcode::BUFFER_STORE_FORMAT_XYZW: + return BUFFER_STORE_FORMAT(4, false, true, inst); + + case Opcode::BUFFER_STORE_DWORD: + return BUFFER_STORE_FORMAT(1, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX2: + return BUFFER_STORE_FORMAT(2, false, false, inst); + case Opcode::BUFFER_STORE_DWORDX3: + return BUFFER_STORE_FORMAT(3, false, false, inst); case Opcode::BUFFER_STORE_DWORDX4: - return BUFFER_STORE_FORMAT(4, false, inst); + return BUFFER_STORE_FORMAT(4, false, false, inst); default: LogMissingOpcode(inst); } @@ -359,7 +371,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma } } -void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, + const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; @@ -410,7 +423,11 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBuffer(num_dwords, handle, address, value, info); + if (is_format) { + ir.StoreBufferFormat(num_dwords, handle, address, value, info); + } else { + ir.StoreBuffer(num_dwords, handle, address, value, info); + } } void Translator::IMAGE_GET_LOD(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3ff347fb..4271ac35 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -347,6 +347,26 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad } } +void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info) { + switch (num_dwords) { + case 1: + Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); + break; + case 2: + Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data); + break; + case 3: + Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data); + break; + case 4: + Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data); + break; + default: + UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); + } +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index c226edac..59ced93e 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -93,6 +93,8 @@ public: BufferInstInfo info); void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); + void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, + const Value& data, BufferInstInfo info); [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 5d413c8a..a8166125 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -55,6 +55,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferF32x2: case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x4: + case Opcode::StoreBufferFormatF32: + case Opcode::StoreBufferFormatF32x2: + case Opcode::StoreBufferFormatF32x3: + case Opcode::StoreBufferFormatF32x4: case Opcode::StoreBufferU32: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0e25b777..4c6122a8 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -82,6 +82,10 @@ OPCODE(StoreBufferF32, Void, Opaq OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32, ) +OPCODE(StoreBufferFormatF32x2, Void, Opaque, Opaque, F32x2, ) +OPCODE(StoreBufferFormatF32x3, Void, Opaque, Opaque, F32x3, ) +OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) // Vector utility diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b3d2311e..97fc5b99 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -37,6 +37,10 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: return true; default: @@ -73,6 +77,10 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferFormatF32x2: case IR::Opcode::LoadBufferFormatF32x3: case IR::Opcode::LoadBufferFormatF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: switch (num_format) { case AmdGpu::NumberFormat::Unorm: case AmdGpu::NumberFormat::Snorm: @@ -112,6 +120,10 @@ bool IsBufferStore(const IR::Inst& inst) { case IR::Opcode::StoreBufferF32x2: case IR::Opcode::StoreBufferF32x3: case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: return true; default: