From 8730968385a8a6d2532e659c220d33dff4e16fac Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 22 May 2024 01:35:12 +0300 Subject: [PATCH] video: Import new shader recompiler + display a triangle (#142) --- .gitmodules | 3 + CMakeLists.txt | 84 +- externals/CMakeLists.txt | 3 + externals/boost | 2 +- externals/sirit | 1 + src/common/assert.cpp | 1 + src/common/enum.h | 101 + src/common/func_traits.h | 34 + src/core/libraries/gnmdriver/gnmdriver.cpp | 4 + src/core/libraries/videoout/driver.cpp | 4 +- src/core/libraries/videoout/driver.h | 4 +- src/core/platform.h | 4 +- src/shader_recompiler/backend/bindings.h | 18 + .../backend/spirv/emit_spirv.cpp | 285 ++ .../backend/spirv/emit_spirv.h | 21 + .../spirv/emit_spirv_bitwise_conversion.cpp | 57 + .../backend/spirv/emit_spirv_composite.cpp | 153 + .../spirv/emit_spirv_context_get_set.cpp | 103 + .../backend/spirv/emit_spirv_convert.cpp | 262 ++ .../spirv/emit_spirv_floating_point.cpp | 355 ++ .../backend/spirv/emit_spirv_image.cpp | 66 + .../backend/spirv/emit_spirv_instructions.h | 335 ++ .../backend/spirv/emit_spirv_integer.cpp | 262 ++ .../backend/spirv/emit_spirv_logical.cpp | 25 + .../backend/spirv/emit_spirv_select.cpp | 41 + .../backend/spirv/emit_spirv_special.cpp | 21 + .../backend/spirv/emit_spirv_undefined.cpp | 29 + .../backend/spirv/spirv_emit_context.cpp | 136 + .../backend/spirv/spirv_emit_context.h | 169 + src/shader_recompiler/exception.h | 64 + .../frontend/control_flow_graph.cpp | 209 + .../frontend/control_flow_graph.h | 66 + src/shader_recompiler/frontend/decode.cpp | 1097 +++++ src/shader_recompiler/frontend/decode.h | 97 + src/shader_recompiler/frontend/format.cpp | 3733 +++++++++++++++++ .../frontend/instruction.cpp | 50 + src/shader_recompiler/frontend/instruction.h | 208 + src/shader_recompiler/frontend/module.h | 10 + src/shader_recompiler/frontend/opcodes.h | 2494 +++++++++++ .../frontend/structured_control_flow.cpp | 829 ++++ .../frontend/structured_control_flow.h | 22 + .../frontend/translate/data_share.cpp | 44 + .../frontend/translate/export.cpp | 49 + .../frontend/translate/flat_memory.cpp | 0 .../frontend/translate/scalar_alu.cpp | 38 + .../frontend/translate/scalar_memory.cpp | 45 + .../frontend/translate/translate.cpp | 152 + .../frontend/translate/translate.h | 73 + .../frontend/translate/vector_alu.cpp | 65 + .../translate/vector_interpolation.cpp | 14 + .../frontend/translate/vector_memory.cpp | 103 + .../ir/abstract_syntax_list.h | 56 + src/shader_recompiler/ir/attribute.cpp | 115 + src/shader_recompiler/ir/attribute.h | 105 + src/shader_recompiler/ir/basic_block.cpp | 149 + src/shader_recompiler/ir/basic_block.h | 180 + src/shader_recompiler/ir/condition.h | 50 + src/shader_recompiler/ir/ir_emitter.cpp | 1129 +++++ src/shader_recompiler/ir/ir_emitter.h | 250 ++ src/shader_recompiler/ir/microinstruction.cpp | 167 + src/shader_recompiler/ir/opcodes.cpp | 12 + src/shader_recompiler/ir/opcodes.h | 107 + src/shader_recompiler/ir/opcodes.inc | 247 ++ .../ir/passes/constant_propogation_pass.cpp | 403 ++ src/shader_recompiler/ir/passes/passes.h | 16 + .../ir/passes/resource_tracking_pass.cpp | 131 + .../ir/passes/ssa_rewrite_pass.cpp | 408 ++ src/shader_recompiler/ir/post_order.cpp | 42 + src/shader_recompiler/ir/post_order.h | 13 + src/shader_recompiler/ir/program.cpp | 31 + src/shader_recompiler/ir/program.h | 28 + src/shader_recompiler/ir/reg.h | 471 +++ src/shader_recompiler/ir/type.cpp | 36 + src/shader_recompiler/ir/type.h | 56 + src/shader_recompiler/ir/value.cpp | 93 + src/shader_recompiler/ir/value.h | 353 ++ src/shader_recompiler/object_pool.h | 107 + src/shader_recompiler/profile.h | 69 + src/shader_recompiler/recompiler.cpp | 72 + src/shader_recompiler/recompiler.h | 33 + src/shader_recompiler/runtime_info.h | 139 + src/video_core/amdgpu/liverpool.cpp | 3 +- src/video_core/amdgpu/liverpool.h | 14 +- .../renderer_vulkan/liverpool_to_vk.cpp | 113 + .../renderer_vulkan/liverpool_to_vk.h | 23 + .../renderer_vulkan/renderer_vulkan.cpp | 4 +- .../renderer_vulkan/renderer_vulkan.h | 9 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 162 + .../renderer_vulkan/vk_graphics_pipeline.h | 46 + .../renderer_vulkan/vk_instance.cpp | 6 + .../renderer_vulkan/vk_pipeline_cache.cpp | 70 + .../renderer_vulkan/vk_pipeline_cache.h | 40 + .../renderer_vulkan/vk_rasterizer.cpp | 99 + .../renderer_vulkan/vk_rasterizer.h | 51 + src/video_core/texture_cache/image.cpp | 2 +- src/video_core/texture_cache/image.h | 28 +- src/video_core/texture_cache/image_view.h | 15 +- src/video_core/texture_cache/slot_vector.h | 4 - .../texture_cache/texture_cache.cpp | 32 + src/video_core/texture_cache/texture_cache.h | 5 + src/video_core/texture_cache/types.h | 6 + src/vulkan_util.cpp | 626 --- src/vulkan_util.h | 51 - 103 files changed, 17793 insertions(+), 729 deletions(-) create mode 160000 externals/sirit create mode 100644 src/common/func_traits.h create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_select.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_special.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp create mode 100644 src/shader_recompiler/backend/spirv/spirv_emit_context.cpp create mode 100644 src/shader_recompiler/backend/spirv/spirv_emit_context.h create mode 100644 src/shader_recompiler/exception.h create mode 100644 src/shader_recompiler/frontend/control_flow_graph.cpp create mode 100644 src/shader_recompiler/frontend/control_flow_graph.h create mode 100644 src/shader_recompiler/frontend/decode.cpp create mode 100644 src/shader_recompiler/frontend/decode.h create mode 100644 src/shader_recompiler/frontend/format.cpp create mode 100644 src/shader_recompiler/frontend/instruction.cpp create mode 100644 src/shader_recompiler/frontend/instruction.h create mode 100644 src/shader_recompiler/frontend/module.h create mode 100644 src/shader_recompiler/frontend/opcodes.h create mode 100644 src/shader_recompiler/frontend/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/structured_control_flow.h create mode 100644 src/shader_recompiler/frontend/translate/data_share.cpp create mode 100644 src/shader_recompiler/frontend/translate/export.cpp create mode 100644 src/shader_recompiler/frontend/translate/flat_memory.cpp create mode 100644 src/shader_recompiler/frontend/translate/scalar_alu.cpp create mode 100644 src/shader_recompiler/frontend/translate/scalar_memory.cpp create mode 100644 src/shader_recompiler/frontend/translate/translate.cpp create mode 100644 src/shader_recompiler/frontend/translate/translate.h create mode 100644 src/shader_recompiler/frontend/translate/vector_alu.cpp create mode 100644 src/shader_recompiler/frontend/translate/vector_interpolation.cpp create mode 100644 src/shader_recompiler/frontend/translate/vector_memory.cpp create mode 100644 src/shader_recompiler/ir/abstract_syntax_list.h create mode 100644 src/shader_recompiler/ir/attribute.cpp create mode 100644 src/shader_recompiler/ir/attribute.h create mode 100644 src/shader_recompiler/ir/basic_block.cpp create mode 100644 src/shader_recompiler/ir/basic_block.h create mode 100644 src/shader_recompiler/ir/condition.h create mode 100644 src/shader_recompiler/ir/ir_emitter.cpp create mode 100644 src/shader_recompiler/ir/ir_emitter.h create mode 100644 src/shader_recompiler/ir/microinstruction.cpp create mode 100644 src/shader_recompiler/ir/opcodes.cpp create mode 100644 src/shader_recompiler/ir/opcodes.h create mode 100644 src/shader_recompiler/ir/opcodes.inc create mode 100644 src/shader_recompiler/ir/passes/constant_propogation_pass.cpp create mode 100644 src/shader_recompiler/ir/passes/passes.h create mode 100644 src/shader_recompiler/ir/passes/resource_tracking_pass.cpp create mode 100644 src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp create mode 100644 src/shader_recompiler/ir/post_order.cpp create mode 100644 src/shader_recompiler/ir/post_order.h create mode 100644 src/shader_recompiler/ir/program.cpp create mode 100644 src/shader_recompiler/ir/program.h create mode 100644 src/shader_recompiler/ir/reg.h create mode 100644 src/shader_recompiler/ir/type.cpp create mode 100644 src/shader_recompiler/ir/type.h create mode 100644 src/shader_recompiler/ir/value.cpp create mode 100644 src/shader_recompiler/ir/value.h create mode 100644 src/shader_recompiler/object_pool.h create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h create mode 100644 src/shader_recompiler/runtime_info.h create mode 100644 src/video_core/renderer_vulkan/liverpool_to_vk.cpp create mode 100644 src/video_core/renderer_vulkan/liverpool_to_vk.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_cache.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h delete mode 100644 src/vulkan_util.cpp delete mode 100644 src/vulkan_util.h diff --git a/.gitmodules b/.gitmodules index 2451ff6b..76ca5dca 100644 --- a/.gitmodules +++ b/.gitmodules @@ -56,3 +56,6 @@ [submodule "externals/zydis"] path = externals/zydis url = https://github.com/zyantific/zydis.git +[submodule "externals/sirit"] + path = externals/sirit + url = https://github.com/raphaelthegreat/sirit diff --git a/CMakeLists.txt b/CMakeLists.txt index 549bcf40..dae1e4f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,6 +218,8 @@ set(COMMON src/common/logging/backend.cpp src/common/io_file.h src/common/error.cpp src/common/error.h + src/common/scope_exit.h + src/common/func_traits.h src/common/native_clock.cpp src/common/native_clock.h src/common/path_util.cpp @@ -286,6 +288,75 @@ set(CORE src/core/aerolib/stubs.cpp src/core/virtual_memory.h ) +set(SHADER_RECOMPILER src/shader_recompiler/exception.h + src/shader_recompiler/object_pool.h + src/shader_recompiler/profile.h + src/shader_recompiler/recompiler.cpp + src/shader_recompiler/recompiler.h + src/shader_recompiler/runtime_info.h + src/shader_recompiler/backend/spirv/emit_spirv.cpp + src/shader_recompiler/backend/spirv/emit_spirv.h + src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp + src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp + src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp + src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp + src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp + src/shader_recompiler/backend/spirv/emit_spirv_image.cpp + src/shader_recompiler/backend/spirv/emit_spirv_instructions.h + src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp + src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp + src/shader_recompiler/backend/spirv/emit_spirv_select.cpp + src/shader_recompiler/backend/spirv/emit_spirv_special.cpp + src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp + src/shader_recompiler/backend/spirv/spirv_emit_context.cpp + src/shader_recompiler/backend/spirv/spirv_emit_context.h + src/shader_recompiler/frontend/translate/data_share.cpp + src/shader_recompiler/frontend/translate/export.cpp + src/shader_recompiler/frontend/translate/flat_memory.cpp + src/shader_recompiler/frontend/translate/scalar_alu.cpp + src/shader_recompiler/frontend/translate/scalar_memory.cpp + src/shader_recompiler/frontend/translate/translate.cpp + src/shader_recompiler/frontend/translate/translate.h + src/shader_recompiler/frontend/translate/vector_alu.cpp + src/shader_recompiler/frontend/translate/vector_interpolation.cpp + src/shader_recompiler/frontend/translate/vector_memory.cpp + src/shader_recompiler/frontend/control_flow_graph.cpp + src/shader_recompiler/frontend/control_flow_graph.h + src/shader_recompiler/frontend/decode.cpp + src/shader_recompiler/frontend/decode.h + src/shader_recompiler/frontend/format.cpp + src/shader_recompiler/frontend/instruction.cpp + src/shader_recompiler/frontend/instruction.h + src/shader_recompiler/frontend/opcodes.h + src/shader_recompiler/frontend/structured_control_flow.cpp + src/shader_recompiler/frontend/structured_control_flow.h + src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp + src/shader_recompiler/ir/passes/resource_tracking_pass.cpp + src/shader_recompiler/ir/passes/constant_propogation_pass.cpp + src/shader_recompiler/ir/passes/passes.h + src/shader_recompiler/ir/abstract_syntax_list.h + src/shader_recompiler/ir/attribute.cpp + src/shader_recompiler/ir/attribute.h + src/shader_recompiler/ir/basic_block.cpp + src/shader_recompiler/ir/basic_block.h + src/shader_recompiler/ir/condition.h + src/shader_recompiler/ir/ir_emitter.cpp + src/shader_recompiler/ir/ir_emitter.h + src/shader_recompiler/ir/microinstruction.cpp + src/shader_recompiler/ir/opcodes.cpp + src/shader_recompiler/ir/opcodes.h + src/shader_recompiler/ir/opcodes.inc + src/shader_recompiler/ir/post_order.cpp + src/shader_recompiler/ir/post_order.h + src/shader_recompiler/ir/program.cpp + src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reg.h + src/shader_recompiler/ir/type.cpp + src/shader_recompiler/ir/type.h + src/shader_recompiler/ir/value.cpp + src/shader_recompiler/ir/value.h +) + set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/liverpool.h src/video_core/amdgpu/pixel_format.cpp @@ -293,18 +364,26 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/resource.h + src/video_core/renderer_vulkan/liverpool_to_vk.cpp + src/video_core/renderer_vulkan/liverpool_to_vk.h src/video_core/renderer_vulkan/renderer_vulkan.cpp src/video_core/renderer_vulkan/renderer_vulkan.h src/video_core/renderer_vulkan/vk_common.cpp src/video_core/renderer_vulkan/vk_common.h src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp src/video_core/renderer_vulkan/vk_descriptor_update_queue.h + src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp + src/video_core/renderer_vulkan/vk_graphics_pipeline.h src/video_core/renderer_vulkan/vk_instance.cpp src/video_core/renderer_vulkan/vk_instance.h src/video_core/renderer_vulkan/vk_master_semaphore.cpp src/video_core/renderer_vulkan/vk_master_semaphore.h + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp + src/video_core/renderer_vulkan/vk_pipeline_cache.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h + src/video_core/renderer_vulkan/vk_rasterizer.cpp + src/video_core/renderer_vulkan/vk_rasterizer.h src/video_core/renderer_vulkan/vk_resource_pool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -371,6 +450,7 @@ if (ENABLE_QT_GUI) ${QT_GUI} ${COMMON} ${CORE} + ${SHADER_RECOMPILER} ${VIDEO_CORE} src/sdl_window.h src/sdl_window.cpp @@ -381,18 +461,18 @@ else() ${INPUT} ${COMMON} ${CORE} + ${SHADER_RECOMPILER} ${VIDEO_CORE} src/main.cpp src/sdl_window.h src/sdl_window.cpp - src/common/scope_exit.h ) endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak) -target_link_libraries(shadps4 PRIVATE discord-rpc boost vma vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) +target_link_libraries(shadps4 PRIVATE discord-rpc boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared) if (NOT ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE SDL3-shared) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 649df9ca..4a285f48 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -87,3 +87,6 @@ if (WIN32) add_subdirectory(winpthreads EXCLUDE_FROM_ALL) target_include_directories(winpthreads INTERFACE winpthreads/include) endif() + +# sirit +add_subdirectory(sirit EXCLUDE_FROM_ALL) diff --git a/externals/boost b/externals/boost index 2dd57a94..87b78171 160000 --- a/externals/boost +++ b/externals/boost @@ -1 +1 @@ -Subproject commit 2dd57a940b6d1b733cbd1abbc3f842da476d3d48 +Subproject commit 87b7817119982e8ad6068855fae31b11590514be diff --git a/externals/sirit b/externals/sirit new file mode 160000 index 00000000..9c12a07e --- /dev/null +++ b/externals/sirit @@ -0,0 +1 @@ +Subproject commit 9c12a07e62dfa404727e7fc85dd83bba84cc830d diff --git a/src/common/assert.cpp b/src/common/assert.cpp index 199f40e4..3a49c939 100644 --- a/src/common/assert.cpp +++ b/src/common/assert.cpp @@ -14,6 +14,7 @@ void assert_fail_impl() { [[noreturn]] void unreachable_impl() { Common::Log::Stop(); + std::fflush(stdout); Crash(); throw std::runtime_error("Unreachable code"); } diff --git a/src/common/enum.h b/src/common/enum.h index 1c1b5d40..a2f08772 100644 --- a/src/common/enum.h +++ b/src/common/enum.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/types.h" #define DECLARE_ENUM_FLAG_OPERATORS(type) \ [[nodiscard]] constexpr type operator|(type a, type b) noexcept { \ @@ -58,3 +59,103 @@ using T = std::underlying_type_t; \ return static_cast(key) == 0; \ } + +namespace Common { + +template +class Flags { +public: + using IntType = std::underlying_type_t; + + Flags() {} + + Flags(IntType t) : m_bits(t) {} + + template + Flags(T f, Tx... fx) { + this->set(f, fx...); + } + + template + void set(Tx... fx) { + m_bits |= bits(fx...); + } + + void set(Flags flags) { + m_bits |= flags.m_bits; + } + + template + void clr(Tx... fx) { + m_bits &= ~bits(fx...); + } + + void clr(Flags flags) { + m_bits &= ~flags.m_bits; + } + + template + bool any(Tx... fx) const { + return (m_bits & bits(fx...)) != 0; + } + + template + bool all(Tx... fx) const { + const IntType mask = bits(fx...); + return (m_bits & mask) == mask; + } + + bool test(T f) const { + return this->any(f); + } + + bool isClear() const { + return m_bits == 0; + } + + void clrAll() { + m_bits = 0; + } + + u32 raw() const { + return m_bits; + } + + Flags operator&(const Flags& other) const { + return Flags(m_bits & other.m_bits); + } + + Flags operator|(const Flags& other) const { + return Flags(m_bits | other.m_bits); + } + + Flags operator^(const Flags& other) const { + return Flags(m_bits ^ other.m_bits); + } + + bool operator==(const Flags& other) const { + return m_bits == other.m_bits; + } + + bool operator!=(const Flags& other) const { + return m_bits != other.m_bits; + } + +private: + IntType m_bits = 0; + + static IntType bit(T f) { + return IntType(1) << static_cast(f); + } + + template + static IntType bits(T f, Tx... fx) { + return bit(f) | bits(fx...); + } + + static IntType bits() { + return 0; + } +}; + +} // namespace Common diff --git a/src/common/func_traits.h b/src/common/func_traits.h new file mode 100644 index 00000000..407b2dbe --- /dev/null +++ b/src/common/func_traits.h @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common { + +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +struct LambdaTraits : LambdaTraits::operator())> {}; + +template +struct LambdaTraits { + template + using ArgType = std::tuple_element_t>; + + static constexpr size_t NUM_ARGS{sizeof...(Args)}; +}; + +} // namespace Common diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b79ef820..b1181647 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -12,6 +12,9 @@ #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" +extern Frontend::WindowSDL* g_window; +std::unique_ptr renderer; + namespace Libraries::GnmDriver { using namespace AmdGpu; @@ -1912,6 +1915,7 @@ int PS4_SYSV_ABI Func_F916890425496553() { void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) { liverpool = std::make_unique(); + renderer = std::make_unique(*g_window, liverpool.get()); LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent); LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index d114b9c8..a91d95c2 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -10,7 +10,7 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" -extern Frontend::WindowSDL* g_window; +extern std::unique_ptr renderer; namespace Libraries::VideoOut { @@ -41,8 +41,6 @@ VideoOutDriver::VideoOutDriver(u32 width, u32 height) { main_port.resolution.fullHeight = height; main_port.resolution.paneWidth = width; main_port.resolution.paneHeight = height; - - renderer = std::make_unique(*g_window); } VideoOutDriver::~VideoOutDriver() = default; diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 5c2bef68..e3a2667b 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -10,8 +10,7 @@ namespace Vulkan { struct Frame; -class RendererVulkan; -} // namespace Vulkan +} namespace Libraries::VideoOut { @@ -84,7 +83,6 @@ private: std::condition_variable_any submit_cond; std::condition_variable done_cond; std::queue requests; - std::unique_ptr renderer; bool is_neo{}; }; diff --git a/src/core/platform.h b/src/core/platform.h index 93f30f05..084f161b 100644 --- a/src/core/platform.h +++ b/src/core/platform.h @@ -7,11 +7,9 @@ #include "common/logging/log.h" #include "common/singleton.h" #include "common/types.h" -#include "magic_enum.hpp" #include #include -#include #include #include @@ -81,7 +79,7 @@ private: std::queue one_time_subscribers{}; std::mutex m_lock{}; }; - std::array()> irq_contexts{}; + std::array irq_contexts{}; }; using IrqC = Common::Singleton; diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 00000000..1b53c74e --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 00000000..f341d465 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -0,0 +1,285 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include "common/func_traits.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::Backend::SPIRV { +namespace { + +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition(func(ctx, std::forward(args)...)); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.ScalarReg(); + } else if constexpr (std::is_same_v) { + return arg.VectorReg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = Common::FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = Common::FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1[1]; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +void Traverse(EmitContext& ctx, IR::Program& program) { + IR::Block* current_block{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: { + const Id label{node.data.block->Definition()}; + if (current_block) { + ctx.OpBranch(label); + } + current_block = node.data.block; + ctx.AddLabel(label); + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + } + case IR::AbstractSyntaxNode::Type::If: { + const Id if_label{node.data.if_node.body->Definition()}; + const Id endif_label{node.data.if_node.merge->Definition()}; + ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: { + const Id body_label{node.data.loop.body->Definition()}; + const Id continue_label{node.data.loop.continue_block->Definition()}; + const Id endloop_label{node.data.loop.merge->Definition()}; + + ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + ctx.OpBranch(body_label); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + const Id break_label{node.data.break_node.merge->Definition()}; + const Id skip_label{node.data.break_node.skip->Definition()}; + ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); + break; + } + case IR::AbstractSyntaxNode::Type::EndIf: + if (current_block) { + ctx.OpBranch(node.data.end_if.merge->Definition()); + } + break; + case IR::AbstractSyntaxNode::Type::Repeat: { + Id cond{ctx.Def(node.data.repeat.cond)}; + const Id loop_header_label{node.data.repeat.loop_header->Definition()}; + const Id merge_label{node.data.repeat.merge->Definition()}; + ctx.OpBranchConditional(cond, loop_header_label, merge_label); + break; + } + case IR::AbstractSyntaxNode::Type::Return: + ctx.OpReturn(); + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.OpUnreachable(); + break; + } + if (node.type != IR::AbstractSyntaxNode::Type::Block) { + current_block = nullptr; + } + } +} + +Id DefineMain(EmitContext& ctx, IR::Program& program) { + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + block->SetDefinition(ctx.OpLabel()); + } + Traverse(ctx, program); + ctx.OpFunctionEnd(); + return main; +} + +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (program.stage) { + case Stage::Compute: { + // const std::array workgroup_size{program.workgroup_size}; + // execution_model = spv::ExecutionModel::GLCompute; + // ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], + // workgroup_size[1], workgroup_size[2]); + break; + } + case Stage::Vertex: + execution_model = spv::ExecutionModel::Vertex; + break; + case Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + if (ctx.profile.lower_left_origin_mode) { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); + } else { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + } + // if (program.info.stores_frag_depth) { + // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + // } + break; + default: + throw NotImplementedException("Stage {}", u32(program.stage)); + } + ctx.AddEntryPoint(execution_model, main, "main", interfaces); +} + +void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { + auto inst{program.blocks.front()->begin()}; + size_t block_index{0}; + ctx.PatchDeferredPhi([&](size_t phi_arg) { + if (phi_arg == 0) { + ++inst; + if (inst == program.blocks[block_index]->end() || + inst->GetOpcode() != IR::Opcode::Phi) { + do { + ++block_index; + inst = program.blocks[block_index]->begin(); + } while (inst->GetOpcode() != IR::Opcode::Phi); + } + } + return ctx.Def(inst->Arg(phi_arg)); + }); +} +} // Anonymous namespace + +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, program, bindings}; + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(program, ctx, main); + if (program.stage == Stage::Vertex) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + PatchPhiNodes(program, ctx); + return ctx.Assemble(); +} + +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector blocks; + blocks.reserve(num_args); + for (size_t index = 0; index < num_args; ++index) { + blocks.push_back(inst->PhiBlock(index)->Definition()); + } + // The type of a phi instruction is stored in its flags + const Id result_type{TypeId(ctx, inst->Flags())}; + return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); +} + +void EmitVoid(EmitContext&) {} + +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { + throw NotImplementedException("Forward identity declaration"); +} + +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { + throw NotImplementedException("Forward identity declaration"); +} + +void EmitReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetZeroFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetSignFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetCarryFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetOverflowFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetVcc(EmitContext& ctx) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetVcc(EmitContext& ctx) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 00000000..f99ad86d --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::SPIRV { + +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, + Bindings& bindings); + +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 00000000..6ae1ce09 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -0,0 +1,57 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +void EmitBitCastU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +void EmitBitCastU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitBitCastF16U16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F32[1], value); +} + +void EmitBitCastF64U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitPackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U64, value); +} + +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); +} + +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); +} + +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 00000000..c7868006 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -0,0 +1,153 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +} + +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); +} + +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); +} + +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +} + +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +} + +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + +void EmitCompositeConstructF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 00000000..0ba72f2a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { + +Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { + if (IR::IsParam(attr)) { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& info{ctx.output_params.at(index).at(element)}; + if (info.num_components == 1) { + return info.id; + } else { + const u32 index_element{element - info.first_element}; + return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element)); + } + } + switch (attr) { + case IR::Attribute::Position0: { + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element)); + case IR::Attribute::RenderTarget0: + return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[0], ctx.ConstU32(element)); + } + default: + throw NotImplementedException("Read attribute {}", attr); + } +} +} // Anonymous namespace + +void EmitGetScalarRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetScalarRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetVectorRegister(EmitContext& ctx) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetVectorRegister(EmitContext& ctx) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitReadConst(EmitContext& ctx) { + throw LogicError("Unreachable instruction"); +} + +Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr, + const IR::Value& offset) { + throw LogicError("Unreachable instruction"); +} + +Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr, + const IR::Value& offset) { + throw LogicError("Unreachable instruction"); +} + +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { + const u32 element{static_cast(attr) % 4}; + if (IR::IsParam(attr)) { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& param{ctx.input_params.at(index)}; + if (!ValidId(param.id)) { + // Attribute is disabled or varying component is not written + return ctx.ConstF32(element == 3 ? 1.0f : 0.0f); + } + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))}; + return ctx.OpLoad(param.component_type, pointer); + } + throw NotImplementedException("Read attribute {}", attr); +} + +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { + switch (attr) { + case IR::Attribute::VertexId: + return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); + default: + throw NotImplementedException("Read U32 attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { + if (attr == IR::Attribute::Param0) { + return; + } + const Id pointer{OutputAttrPointer(ctx, attr, element)}; + ctx.OpStore(pointer, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 00000000..b033f91b --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,262 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id ExtractU16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U16, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.ConstU32(16u)); + } +} + +Id ExtractS16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.S16, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.ConstU32(16u)); + } +} + +Id ExtractU8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpUConvert(ctx.U8, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.ConstU32(8u)); + } +} + +Id ExtractS8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpSConvert(ctx.S8, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.ConstU32(8u)); + } +} +} // Anonymous namespace + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); + } else { + return ctx.OpConvertFToS(ctx.U32[1], value); + } +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +Id EmitConvertF16F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F16[1], value); +} + +Id EmitConvertF32F16(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF32F64(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF64F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F64[1], value); +} + +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 00000000..148eee0c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,355 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); +} + +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); +} + +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); +} + +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFAdd(ctx.F16[1], a, b); +} + +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFAdd(ctx.F32[1], a, b); +} + +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFAdd(ctx.F64[1], a, b); +} + +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return ctx.OpFma(ctx.F16[1], a, b, c); +} + +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return ctx.OpFma(ctx.F32[1], a, b, c); +} + +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return ctx.OpFma(ctx.F64[1], a, b, c); +} + +Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F32[1], a, b); +} + +Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F64[1], a, b); +} + +Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F32[1], a, b); +} + +Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F64[1], a, b); +} + +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFMul(ctx.F16[1], a, b); +} + +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFMul(ctx.F32[1], a, b); +} + +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpFMul(ctx.F64[1], a, b); +} + +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); +} + +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); +} + +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); +} + +Id EmitFPSin(EmitContext& ctx, Id value) { + return ctx.OpSin(ctx.F32[1], value); +} + +Id EmitFPCos(EmitContext& ctx, Id value) { + return ctx.OpCos(ctx.F32[1], value); +} + +Id EmitFPExp2(EmitContext& ctx, Id value) { + return ctx.OpExp2(ctx.F32[1], value); +} + +Id EmitFPLog2(EmitContext& ctx, Id value) { + return ctx.OpLog2(ctx.F32[1], value); +} + +Id EmitFPRecip32(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.0f), value); +} + +Id EmitFPRecip64(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); +} + +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); +} + +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F64[1], value); +} + +Id EmitFPSqrt(EmitContext& ctx, Id value) { + return ctx.OpSqrt(ctx.F32[1], value); +} + +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return ctx.OpFClamp(ctx.F16[1], value, zero, one); +} + +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.ConstF32(f32{0.0})}; + const Id one{ctx.ConstF32(f32{1.0})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); +} + +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return ctx.OpFClamp(ctx.F64[1], value, zero, one); +} + +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return ctx.OpFClamp(ctx.F16[1], value, min_value, max_value); +} + +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return ctx.OpFClamp(ctx.F32[1], value, min_value, max_value); +} + +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return ctx.OpFClamp(ctx.F64[1], value, min_value, max_value); +} + +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); +} + +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); +} + +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); +} + +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); +} + +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); +} + +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); +} + +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); +} + +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); +} + +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); +} + +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); +} + +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); +} + +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); +} + +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitFPIsNan16(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1[1], value); +} + +Id EmitFPIsNan32(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1[1], value); +} + +Id EmitFPIsNan64(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 00000000..68066916 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips_val) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivatives, const IR::Value& offset, Id lod_clamp) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 00000000..20d58e90 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,335 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext&); +void EmitJoin(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitGetVcc(EmitContext& ctx); +void EmitSetVcc(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitGetScalarRegister(EmitContext& ctx); +void EmitSetScalarRegister(EmitContext& ctx); +void EmitGetVectorRegister(EmitContext& ctx); +void EmitSetVectorRegister(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetScc(EmitContext& ctx); +Id EmitReadConst(EmitContext& ctx); +Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, + const IR::Value& offset); +Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, + const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitInvocationInfo(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +Id EmitReadSharedU8(EmitContext& ctx, Id offset); +Id EmitReadSharedS8(EmitContext& ctx, Id offset); +Id EmitReadSharedU16(EmitContext& ctx, Id offset); +Id EmitReadSharedS16(EmitContext& ctx, Id offset); +Id EmitReadSharedU32(EmitContext& ctx, Id offset); +Id EmitReadSharedU64(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext&); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitSDiv32(EmitContext& ctx, Id a, Id b); +Id EmitUDiv32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivatives, const IR::Value& offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 00000000..74951e16 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -0,0 +1,262 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + // IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + // if (!zero) { + // return; + // } + // zero->SetDefinition(ctx.OpIEqual(ctx.U1[1], result, ctx.u32_zero_value)); + // zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + // IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + // if (!sign) { + // return; + // } + // sign->SetDefinition(ctx.OpSLessThan(ctx.U1[1], result, ctx.u32_zero_value)); + // sign->Invalidate(); +} +} // Anonymous namespace + +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return ctx.OpIAdd(ctx.U32[1], a, b); + // Id result{}; + // if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + // const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + // const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + // result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + // const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + // carry->SetDefinition(ctx.OpINotEqual(ctx.U1[1][1], carry_value, ctx.u32_zero_value)); + // carry->Invalidate(); + //} else { + // result = ctx.OpIAdd(ctx.U32[1], a, b); + //} + // SetZeroFlag(ctx, inst, result); + // SetSignFlag(ctx, inst, result); + // if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + // constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + // const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1[1], a, ctx.u32_zero_value)}; + // const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; + + // const Id positive_test{ctx.OpSGreaterThan(ctx.U1[1], b, sub_a)}; + // const Id negative_test{ctx.OpSLessThan(ctx.U1[1], b, sub_a)}; + // const Id carry_flag{ctx.OpSelect(ctx.U1[1], is_positive, positive_test, negative_test)}; + // overflow->SetDefinition(carry_flag); + // overflow->Invalidate(); + //} + // return result; +} + +Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIAdd(ctx.U64, a, b); +} + +Id EmitISub32(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U32[1], a, b); +} + +Id EmitISub64(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U64, a, b); +} + +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U32[1], a, b); +} + +Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSDiv(ctx.U32[1], a, b); +} + +Id EmitUDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUDiv(ctx.U32[1], a, b); +} + +Id EmitINeg32(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U32[1], value); +} + +Id EmitINeg64(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U64, value); +} + +Id EmitIAbs32(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U32[1], value); +} + +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); +} + +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); +} + +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { + return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); +} + +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitReverse32(EmitContext& ctx, Id value) { + return ctx.OpBitReverse(ctx.U32[1], value); +} + +Id EmitBitCount32(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U32[1], value); +} + +Id EmitBitwiseNot32(EmitContext& ctx, Id value) { + return ctx.OpNot(ctx.U32[1], value); +} + +Id EmitFindSMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindSMsb(ctx.U32[1], value); +} + +Id EmitFindUMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindUMsb(ctx.U32[1], value); +} + +Id EmitSMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSMin(ctx.U32[1], a, b); +} + +Id EmitUMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMin(ctx.U32[1], a, b); +} + +Id EmitSMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSMax(ctx.U32[1], a, b); +} + +Id EmitUMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMax(ctx.U32[1], a, b); +} + +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_spirv_clamp) { + value = ctx.OpBitcast(ctx.S32[1], value); + min = ctx.OpBitcast(ctx.S32[1], min); + max = ctx.OpBitcast(ctx.S32[1], max); + result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); + result = ctx.OpBitcast(ctx.U32[1], result); + } else { + result = ctx.OpSClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); + } else { + result = ctx.OpUClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 00000000..a80f1d61 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1[1], a, b); +} + +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1[1], a, b); +} + +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1[1], a, b); +} + +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 00000000..003857fb --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U1[1], cond, true_value, false_value); +} + +Id EmitSelectU8(EmitContext&, Id, Id, Id) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U16, cond, true_value, false_value); +} + +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); +} + +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); +} + +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); +} + +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 00000000..43f1a581 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +void EmitPrologue(EmitContext& ctx) {} + +void EmitEpilogue(EmitContext& ctx) {} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + throw NotImplementedException("Geometry streams"); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + throw NotImplementedException("Geometry streams"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 00000000..ab86cae1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { + +Id EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.U1[1]); +} + +Id EmitUndefU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU32(EmitContext& ctx) { + return ctx.OpUndef(ctx.U32[1]); +} + +Id EmitUndefU64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp new file mode 100644 index 00000000..e9a55766 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -0,0 +1,136 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "shader_recompiler/backend/spirv/spirv_emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { + +std::string_view StageName(Stage stage) { + switch (stage) { + case Stage::Vertex: + return "vs"; + case Stage::TessellationControl: + return "tcs"; + case Stage::TessellationEval: + return "tes"; + case Stage::Geometry: + return "gs"; + case Stage::Fragment: + return "fs"; + case Stage::Compute: + return "cs"; + } + throw InvalidArgument("Invalid stage {}", u32(stage)); +} + +template +void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { + ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), + std::forward(args)...) + .c_str()); +} + +} // Anonymous namespace + +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + u32& uniform_binding{bindings.unified}; + u32& storage_binding{bindings.unified}; + u32& texture_binding{bindings.unified}; + u32& image_binding{bindings.unified}; + AddCapability(spv::Capability::Shader); + DefineArithmeticTypes(); + DefineInterfaces(program); +} + +EmitContext::~EmitContext() = default; + +Id EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return value.InstRecursive()->Definition(); + } + switch (value.Type()) { + case IR::Type::Void: + return Id{}; + case IR::Type::U1: + return value.U1() ? true_value : false_value; + case IR::Type::U32: + return ConstU32(value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); + case IR::Type::F32: + return ConstF32(value.F32()); + case IR::Type::F64: + return Constant(F64[1], value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} + +void EmitContext::DefineArithmeticTypes() { + void_id = Name(TypeVoid(), "void_id"); + U1[1] = Name(TypeBool(), "bool_id"); + // F16[1] = Name(TypeFloat(16), "f16_id"); + F32[1] = Name(TypeFloat(32), "f32_id"); + // F64[1] = Name(TypeFloat(64), "f64_id"); + S32[1] = Name(TypeSInt(32), "i32_id"); + U32[1] = Name(TypeUInt(32), "u32_id"); + // U8 = Name(TypeSInt(8), "u8"); + // S8 = Name(TypeUInt(8), "s8"); + // U16 = Name(TypeUInt(16), "u16_id"); + // S16 = Name(TypeSInt(16), "s16_id"); + // U64 = Name(TypeUInt(64), "u64_id"); + + for (u32 i = 2; i <= 4; i++) { + // F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i)); + F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i)); + // F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i)); + S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i)); + U32[i] = Name(TypeVector(U32[1], i), fmt::format("u32vec{}_id", i)); + U1[i] = Name(TypeVector(U1[1], i), fmt::format("bvec{}_id", i)); + } + + true_value = ConstantTrue(U1[1]); + false_value = ConstantFalse(U1[1]); + u32_zero_value = ConstU32(0U); + f32_zero_value = ConstF32(0.0f); + + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); +} + +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program); + DefineOutputs(program); +} + +void EmitContext::DefineInputs(const IR::Program& program) { + switch (stage) { + case Stage::Vertex: + vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); + base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + break; + default: + break; + } +} + +void EmitContext::DefineOutputs(const IR::Program& program) { + switch (stage) { + case Stage::Vertex: + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + break; + case Stage::Fragment: + frag_color[0] = DefineOutput(F32[4], 0); + Name(frag_color[0], fmt::format("frag_color{}", 0)); + interfaces.push_back(frag_color[0]); + break; + default: + break; + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h new file mode 100644 index 00000000..bf78a445 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +struct VectorIds { + [[nodiscard]] Id& operator[](u32 index) { + return ids[index - 1]; + } + + [[nodiscard]] const Id& operator[](u32 index) const { + return ids[index - 1]; + } + + std::array ids; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); + ~EmitContext(); + + Id Def(const IR::Value& value); + + [[nodiscard]] Id DefineInput(Id type, u32 location) { + const Id input_id{DefineVar(type, spv::StorageClass::Input)}; + Decorate(input_id, spv::Decoration::Location, location); + return input_id; + } + + [[nodiscard]] Id DefineOutput(Id type, std::optional location = std::nullopt) { + const Id output_id{DefineVar(type, spv::StorageClass::Output)}; + if (location) { + Decorate(output_id, spv::Decoration::Location, *location); + } + return output_id; + } + + [[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding, bool readonly = false) { + const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)}; + Decorate(uniform_id, spv::Decoration::DescriptorSet, set); + Decorate(uniform_id, spv::Decoration::Binding, binding); + if (readonly) { + Decorate(uniform_id, spv::Decoration::NonWritable); + } + return uniform_id; + } + + template + [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) { + const Id pointer_type_id{TypePointer(storage_class, type)}; + return global ? AddGlobalVariable(pointer_type_id, storage_class) + : AddLocalVariable(pointer_type_id, storage_class); + } + + [[nodiscard]] Id DefineVariable(Id type, std::optional builtin, + spv::StorageClass storage_class) { + const Id id{DefineVar(type, storage_class)}; + if (builtin) { + Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + interfaces.push_back(id); + return id; + } + + [[nodiscard]] Id ConstU32(u32 value) { + return Constant(U32[1], value); + } + + template + [[nodiscard]] Id ConstU32(Args&&... values) { + constexpr u32 size = static_cast(sizeof...(values)); + static_assert(size >= 2); + const std::array constituents{Constant(U32[1], values)...}; + const Id type = size <= 4 ? U32[size] : TypeArray(U32[1], ConstU32(size)); + return ConstantComposite(type, constituents); + } + + [[nodiscard]] Id ConstS32(s32 value) { + return Constant(S32[1], value); + } + + template + [[nodiscard]] Id ConstS32(Args&&... values) { + constexpr u32 size = static_cast(sizeof...(values)); + static_assert(size >= 2); + const std::array constituents{Constant(S32[1], values)...}; + const Id type = size <= 4 ? S32[size] : TypeArray(S32[1], ConstU32(size)); + return ConstantComposite(type, constituents); + } + + [[nodiscard]] Id ConstF32(f32 value) { + return Constant(F32[1], value); + } + + template + [[nodiscard]] Id ConstF32(Args... values) { + constexpr u32 size = static_cast(sizeof...(values)); + static_assert(size >= 2); + const std::array constituents{Constant(F32[1], values)...}; + const Id type = size <= 4 ? F32[size] : TypeArray(F32[1], ConstU32(size)); + return ConstantComposite(type, constituents); + } + + const Profile& profile; + Stage stage{}; + + Id void_id{}; + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U64{}; + VectorIds F16{}; + VectorIds F32{}; + VectorIds F64{}; + VectorIds S32{}; + VectorIds U32{}; + VectorIds U1{}; + + Id true_value{}; + Id false_value{}; + Id u32_zero_value{}; + Id f32_zero_value{}; + + Id output_u32{}; + Id output_f32{}; + + boost::container::small_vector interfaces; + + Id output_position{}; + Id vertex_index{}; + Id base_vertex{}; + std::array frag_color{}; + + struct InputParamInfo { + Id id; + Id pointer_type; + Id component_type; + }; + std::array input_params{}; + + struct ParamElementInfo { + Id id{}; + u32 first_element{}; + u32 num_components{}; + }; + std::array, 32> output_params{}; + +private: + void DefineArithmeticTypes(); + void DefineInterfaces(const IR::Program& program); + void DefineInputs(const IR::Program& program); + void DefineOutputs(const IR::Program& program); +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h new file mode 100644 index 00000000..293b2fb4 --- /dev/null +++ b/src/shader_recompiler/exception.h @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include + +namespace Shader { + +class Exception : public std::exception { +public: + explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} + + [[nodiscard]] const char* what() const noexcept override { + return err_message.c_str(); + } + + void Prepend(std::string_view prepend) { + err_message.insert(0, prepend); + } + + void Append(std::string_view append) { + err_message += append; + } + +private: + std::string err_message; +}; + +class LogicError : public Exception { +public: + template + explicit LogicError(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} +}; + +class RuntimeError : public Exception { +public: + template + explicit RuntimeError(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} +}; + +class NotImplementedException : public Exception { +public: + template + explicit NotImplementedException(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} { + Append(" is not implemented"); + } +}; + +class InvalidArgument : public Exception { +public: + template + explicit InvalidArgument(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} +}; + +} // namespace Shader diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp new file mode 100644 index 00000000..475732c1 --- /dev/null +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/assert.h" +#include "shader_recompiler/frontend/control_flow_graph.h" + +namespace Shader::Gcn { + +struct Compare { + bool operator()(const Block& lhs, u32 rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(u32 lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; + +static IR::Condition MakeCondition(Opcode opcode) { + switch (opcode) { + case Opcode::S_CBRANCH_SCC0: + return IR::Condition::Scc0; + case Opcode::S_CBRANCH_SCC1: + return IR::Condition::Scc1; + case Opcode::S_CBRANCH_VCCZ: + return IR::Condition::Vccz; + case Opcode::S_CBRANCH_VCCNZ: + return IR::Condition::Vccnz; + case Opcode::S_CBRANCH_EXECZ: + return IR::Condition::Execz; + case Opcode::S_CBRANCH_EXECNZ: + return IR::Condition::Execnz; + default: + return IR::Condition::True; + } +} + +CFG::CFG(ObjectPool& block_pool_, std::span inst_list_) + : block_pool{block_pool_}, inst_list{inst_list_} { + index_to_pc.resize(inst_list.size()); + EmitLabels(); + EmitBlocks(); + LinkBlocks(); +} + +void CFG::EmitLabels() { + // Always set a label at entry point. + u32 pc = 0; + labels.push_back(pc); + + const auto add_label = [this](u32 address) { + const auto it = std::ranges::find(labels, address); + if (it == labels.end()) { + labels.push_back(address); + } + }; + + // Iterate instruction list and add labels to branch targets. + for (u32 i = 0; i < inst_list.size(); i++) { + index_to_pc[i] = pc; + const GcnInst inst = inst_list[i]; + if (inst.IsUnconditionalBranch()) { + const u32 target = inst.BranchTarget(pc); + add_label(target); + } else if (inst.IsConditionalBranch()) { + const u32 true_label = inst.BranchTarget(pc); + const u32 false_label = pc + inst.length; + add_label(true_label); + add_label(false_label); + } else if (inst.opcode == Opcode::S_ENDPGM) { + const u32 next_label = pc + inst.length; + add_label(next_label); + } + pc += inst.length; + } + + // Sort labels to make sure block insertion is correct. + std::ranges::sort(labels); +} + +void CFG::EmitBlocks() { + const auto get_index = [this](Label label) -> size_t { + if (label == 0) { + return 0ULL; + } + const auto it_index = std::ranges::lower_bound(index_to_pc, label); + ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back()); + return std::distance(index_to_pc.begin(), std::prev(it_index)); + }; + + for (auto it = labels.begin(); it != labels.end(); it++) { + const Label start = *it; + const auto next_it = std::next(it); + const bool is_last = next_it == labels.end(); + if (is_last) { + // Last label is special. + return; + } + const Label end = *next_it; + const size_t end_index = get_index(end); + const auto& end_inst = inst_list[end_index]; + + // Insert block between the labels using the last instruction + // as an indicator for branching type. + Block* block = block_pool.Create(); + block->begin = start; + block->end = end; + block->begin_index = get_index(start); + block->end_index = end_index; + block->end_inst = end_inst; + block->cond = MakeCondition(end_inst.opcode); + blocks.insert(*block); + } +} + +void CFG::LinkBlocks() { + const auto get_block = [this](u32 address) { + const auto it = blocks.find(address, Compare{}); + ASSERT_MSG(it != blocks.end() && it->begin == address); + return &*it; + }; + + for (auto& block : blocks) { + const auto end_inst{block.end_inst}; + // If the block doesn't end with a branch we simply + // need to link with the next block. + if (!end_inst.IsTerminateInstruction()) { + block.branch_true = get_block(block.end); + block.end_class = EndClass::Branch; + continue; + } + + // Find the branch targets from the instruction and link the blocks. + // Note: Block end address is one instruction after end_inst. + const u32 branch_pc = block.end - end_inst.length; + const u32 target_pc = end_inst.BranchTarget(branch_pc); + if (end_inst.IsUnconditionalBranch()) { + block.branch_true = get_block(target_pc); + block.end_class = EndClass::Branch; + } else if (end_inst.IsConditionalBranch()) { + block.branch_true = get_block(target_pc); + block.branch_false = get_block(block.end); + block.end_class = EndClass::Branch; + } else { + // Exit blocks don't link to anything. + block.end_class = EndClass::Exit; + } + } +} + +std::string CFG::Dot() const { + int node_uid{0}; + + const auto name_of = [](const Block& block) { return fmt::format("\"{:#x}\"", block.begin); }; + + std::string dot{"digraph shader {\n"}; + dot += fmt::format("\tsubgraph cluster_{} {{\n", 0); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const Block& block : blocks) { + const std::string name{name_of(block)}; + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, name_of(*branch)); + if (add_label && block.cond != IR::Condition::True && + block.cond != IR::Condition::False) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != IR::Condition::False) { + add_branch(block.branch_true, true); + } + if (block.cond != IR::Condition::True) { + add_branch(block.branch_false, false); + } + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += + fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid); + ++node_uid; + break; + // case EndClass::Kill: + // dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + // dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + // node_uid); + // ++node_uid; + // break; + } + } + dot += "\t\tlabel = \"main\";\n\t}\n"; + if (blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", name_of(*blocks.begin())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + dot += "}\n"; + return dot; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h new file mode 100644 index 00000000..f3c00793 --- /dev/null +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include + +#include "common/types.h" +#include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/ir/condition.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Gcn { + +using Hook = + boost::intrusive::set_base_hook>; + +enum class EndClass { + Branch, ///< Block ends with a (un)conditional branch. + Exit, ///< Block ends with an exit instruction. +}; + +/// A block represents a linear range of instructions. +struct Block : Hook { + [[nodiscard]] bool Contains(u32 pc) const noexcept; + + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + + u32 begin; + u32 end; + u32 begin_index; + u32 end_index; + IR::Condition cond{}; + GcnInst end_inst{}; + EndClass end_class{}; + Block* branch_true{}; + Block* branch_false{}; +}; + +class CFG { + using Label = u32; + +public: + explicit CFG(ObjectPool& block_pool, std::span inst_list); + + [[nodiscard]] std::string Dot() const; + +private: + void EmitLabels(); + void EmitBlocks(); + void LinkBlocks(); + +public: + ObjectPool& block_pool; + std::span inst_list; + std::vector index_to_pc; + boost::container::small_vector labels; + boost::intrusive::set blocks; +}; + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp new file mode 100644 index 00000000..dcc0495d --- /dev/null +++ b/src/shader_recompiler/frontend/decode.cpp @@ -0,0 +1,1097 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "common/assert.h" +#include "shader_recompiler/frontend/decode.h" + +namespace Shader::Gcn { + +namespace bit { +template +T extract(T value, u32 lst, u32 fst) { + return (value >> fst) & ~(~T(0) << (lst - fst + 1)); +} +} // namespace bit + +InstEncoding GetInstructionEncoding(u32 token) { + auto encoding = static_cast(token & (u32)EncodingMask::MASK_9bit); + switch (encoding) { + case InstEncoding::SOP1: + case InstEncoding::SOPP: + case InstEncoding::SOPC: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_7bit); + switch (encoding) { + case InstEncoding::VOP1: + case InstEncoding::VOPC: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_6bit); + switch (encoding) { + case InstEncoding::VOP3: + case InstEncoding::EXP: + case InstEncoding::VINTRP: + case InstEncoding::DS: + case InstEncoding::MUBUF: + case InstEncoding::MTBUF: + case InstEncoding::MIMG: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_5bit); + switch (encoding) { + case InstEncoding::SMRD: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_4bit); + switch (encoding) { + case InstEncoding::SOPK: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_2bit); + switch (encoding) { + case InstEncoding::SOP2: + return encoding; + default: + break; + } + + encoding = static_cast(token & (u32)EncodingMask::MASK_1bit); + switch (encoding) { + case InstEncoding::VOP2: + return encoding; + default: + break; + } + + UNREACHABLE(); + return InstEncoding::ILLEGAL; +} + +bool HasAdditionalLiteral(InstEncoding encoding, Opcode opcode) { + switch (encoding) { + case InstEncoding::SOPK: { + return opcode == Opcode::S_SETREG_IMM32_B32; + } + case InstEncoding::VOP2: { + return opcode == Opcode::V_MADMK_F32 || opcode == Opcode::V_MADAK_F32; + } + default: + return false; + } +} + +bool IsVop3BEncoding(Opcode opcode) { + return opcode == Opcode::V_ADD_I32 || opcode == Opcode::V_ADDC_U32 || + opcode == Opcode::V_SUB_I32 || opcode == Opcode::V_SUBB_U32 || + opcode == Opcode::V_SUBREV_I32 || opcode == Opcode::V_SUBBREV_U32 || + opcode == Opcode::V_DIV_SCALE_F32 || opcode == Opcode::V_DIV_SCALE_F64 || + opcode == Opcode::V_MAD_U64_U32 || opcode == Opcode::V_MAD_I64_I32; +} + +GcnInst GcnDecodeContext::decodeInstruction(GcnCodeSlice& code) { + const uint32_t token = code.at(0); + + InstEncoding encoding = GetInstructionEncoding(token); + ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding"); + uint32_t encodingLen = getEncodingLength(encoding); + + // Clear the instruction + m_instruction = GcnInst(); + + // Decode + if (encodingLen == sizeof(uint32_t)) { + decodeInstruction32(encoding, code); + } else { + decodeInstruction64(encoding, code); + } + + // Update instruction meta info. + updateInstructionMeta(encoding); + + // Detect literal constant. Only 32 bits instructions may have literal constant. + // Note: Literal constant decode must be performed after meta info updated. + if (encodingLen == sizeof(u32)) { + decodeLiteralConstant(encoding, code); + } + + repairOperandType(); + return m_instruction; +} + +uint32_t GcnDecodeContext::getEncodingLength(InstEncoding encoding) { + uint32_t instLength = 0; + + switch (encoding) { + case InstEncoding::SOP1: + case InstEncoding::SOPP: + case InstEncoding::SOPC: + case InstEncoding::SOPK: + case InstEncoding::SOP2: + case InstEncoding::VOP1: + case InstEncoding::VOPC: + case InstEncoding::VOP2: + case InstEncoding::SMRD: + case InstEncoding::VINTRP: + instLength = sizeof(uint32_t); + break; + + case InstEncoding::VOP3: + case InstEncoding::MUBUF: + case InstEncoding::MTBUF: + case InstEncoding::MIMG: + case InstEncoding::DS: + case InstEncoding::EXP: + instLength = sizeof(uint64_t); + break; + } + return instLength; +} + +uint32_t GcnDecodeContext::getOpMapOffset(InstEncoding encoding) { + uint32_t offset = 0; + switch (encoding) { + case InstEncoding::SOP1: + offset = (uint32_t)OpcodeMap::OP_MAP_SOP1; + break; + case InstEncoding::SOPP: + offset = (uint32_t)OpcodeMap::OP_MAP_SOPP; + break; + case InstEncoding::SOPC: + offset = (uint32_t)OpcodeMap::OP_MAP_SOPC; + break; + case InstEncoding::VOP1: + offset = (uint32_t)OpcodeMap::OP_MAP_VOP1; + break; + case InstEncoding::VOPC: + offset = (uint32_t)OpcodeMap::OP_MAP_VOPC; + break; + case InstEncoding::VOP3: + offset = (uint32_t)OpcodeMap::OP_MAP_VOP3; + break; + case InstEncoding::EXP: + offset = (uint32_t)OpcodeMap::OP_MAP_EXP; + break; + case InstEncoding::VINTRP: + offset = (uint32_t)OpcodeMap::OP_MAP_VINTRP; + break; + case InstEncoding::DS: + offset = (uint32_t)OpcodeMap::OP_MAP_DS; + break; + case InstEncoding::MUBUF: + offset = (uint32_t)OpcodeMap::OP_MAP_MUBUF; + break; + case InstEncoding::MTBUF: + offset = (uint32_t)OpcodeMap::OP_MAP_MTBUF; + break; + case InstEncoding::MIMG: + offset = (uint32_t)OpcodeMap::OP_MAP_MIMG; + break; + case InstEncoding::SMRD: + offset = (uint32_t)OpcodeMap::OP_MAP_SMRD; + break; + case InstEncoding::SOPK: + offset = (uint32_t)OpcodeMap::OP_MAP_SOPK; + break; + case InstEncoding::SOP2: + offset = (uint32_t)OpcodeMap::OP_MAP_SOP2; + break; + case InstEncoding::VOP2: + offset = (uint32_t)OpcodeMap::OP_MAP_VOP2; + break; + } + return offset; +} + +uint32_t GcnDecodeContext::mapEncodingOp(InstEncoding encoding, Opcode opcode) { + // Map from uniform opcode to encoding specific opcode. + uint32_t encodingOp = 0; + if (encoding == InstEncoding::VOP3) { + if (opcode >= Opcode::V_CMP_F_F32 && opcode <= Opcode::V_CMPX_T_U64) { + uint32_t op = + static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOPC); + encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOPC); + } else if (opcode >= Opcode::V_CNDMASK_B32 && opcode <= Opcode::V_CVT_PK_I16_I32) { + uint32_t op = + static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOP2); + encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOP2); + } else if (opcode >= Opcode::V_NOP && opcode <= Opcode::V_MOVRELSD_B32) { + uint32_t op = + static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOP1); + encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOP1); + } else { + encodingOp = + static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOP3); + } + } else { + uint32_t mapOffset = getOpMapOffset(encoding); + encodingOp = static_cast(opcode) - mapOffset; + } + + return encodingOp; +} + +void GcnDecodeContext::updateInstructionMeta(InstEncoding encoding) { + uint32_t encodingOp = mapEncodingOp(encoding, m_instruction.opcode); + InstFormat instFormat = InstructionFormat(encoding, encodingOp); + + ASSERT_MSG(instFormat.src_type != ScalarType::Undefined && + instFormat.dst_type != ScalarType::Undefined, + "TODO: Instruction format table not complete, please fix it manually."); + + m_instruction.inst_class = instFormat.inst_class; + m_instruction.category = instFormat.inst_category; + m_instruction.encoding = encoding; + m_instruction.src_count = instFormat.src_count; + m_instruction.length = getEncodingLength(encoding); + + // Update src operand scalar type. + auto setOperandType = [&instFormat](InstOperand& src) { + // Only update uninitialized numeric type. + if (src.type == ScalarType::Undefined) { + src.type = instFormat.src_type; + } + }; + + std::for_each_n(m_instruction.src.begin(), m_instruction.src_count, setOperandType); + + // Update dst operand scalar type. + switch (m_instruction.dst_count) { + case 2: { + if (m_instruction.dst[1].type == ScalarType::Undefined) { + // Only VOP3B has an additional sdst operand, + // and it must be Uint64 + m_instruction.dst[1].type = ScalarType::Uint64; + } + } + [[fallthrough]]; + case 1: { + if (m_instruction.dst[0].type == ScalarType::Undefined) { + m_instruction.dst[0].type = instFormat.dst_type; + } + } + } +} + +void GcnDecodeContext::repairOperandType() { + // Some instructions' operand type is not uniform, + // it's best to change the instruction table's format and fix them there, + // but it's a hard work. + // We fix them here. + switch (m_instruction.opcode) { + case Opcode::V_MAD_U64_U32: + m_instruction.src[2].type = ScalarType::Uint64; + break; + case Opcode::V_MAD_I64_I32: + m_instruction.src[2].type = ScalarType::Sint64; + break; + case Opcode::V_ADDC_U32: + m_instruction.src[2].type = ScalarType::Uint64; + break; + case Opcode::IMAGE_GATHER4_C: + m_instruction.src[0].type = ScalarType::Any; + break; + } +} + +OperandField GcnDecodeContext::getOperandField(uint32_t code) { + OperandField field = {}; + if (code >= ScalarGPRMin && code <= ScalarGPRMax) { + field = OperandField::ScalarGPR; + } else if (code >= SignedConstIntPosMin && code <= SignedConstIntPosMax) { + field = OperandField::SignedConstIntPos; + } else if (code >= SignedConstIntNegMin && code <= SignedConstIntNegMax) { + field = OperandField::SignedConstIntNeg; + } else if (code >= VectorGPRMin && code <= VectorGPRMax) { + field = OperandField::VectorGPR; + } else { + field = static_cast(code); + } + return field; +} + +void GcnDecodeContext::decodeInstruction32(InstEncoding encoding, GcnCodeSlice& code) { + u32 hexInstruction = code.readu32(); + switch (encoding) { + case InstEncoding::SOP1: + decodeInstructionSOP1(hexInstruction); + break; + case InstEncoding::SOPP: + decodeInstructionSOPP(hexInstruction); + break; + case InstEncoding::SOPC: + decodeInstructionSOPC(hexInstruction); + break; + case InstEncoding::SOPK: + decodeInstructionSOPK(hexInstruction); + break; + case InstEncoding::SOP2: + decodeInstructionSOP2(hexInstruction); + break; + case InstEncoding::VOP1: + decodeInstructionVOP1(hexInstruction); + break; + case InstEncoding::VOPC: + decodeInstructionVOPC(hexInstruction); + break; + case InstEncoding::VOP2: + decodeInstructionVOP2(hexInstruction); + break; + case InstEncoding::SMRD: + decodeInstructionSMRD(hexInstruction); + break; + case InstEncoding::VINTRP: + decodeInstructionVINTRP(hexInstruction); + break; + } +} + +void GcnDecodeContext::decodeInstruction64(InstEncoding encoding, GcnCodeSlice& code) { + uint64_t hexInstruction = code.readu64(); + switch (encoding) { + case InstEncoding::VOP3: + decodeInstructionVOP3(hexInstruction); + break; + case InstEncoding::MUBUF: + decodeInstructionMUBUF(hexInstruction); + break; + case InstEncoding::MTBUF: + decodeInstructionMTBUF(hexInstruction); + break; + case InstEncoding::MIMG: + decodeInstructionMIMG(hexInstruction); + break; + case InstEncoding::DS: + decodeInstructionDS(hexInstruction); + break; + case InstEncoding::EXP: + decodeInstructionEXP(hexInstruction); + break; + } +} + +void GcnDecodeContext::decodeLiteralConstant(InstEncoding encoding, GcnCodeSlice& code) { + if (HasAdditionalLiteral(encoding, m_instruction.opcode)) { + u32 encoding_op = mapEncodingOp(encoding, m_instruction.opcode); + InstFormat instFormat = InstructionFormat(encoding, encoding_op); + m_instruction.src[m_instruction.src_count].field = OperandField::LiteralConst; + m_instruction.src[m_instruction.src_count].type = instFormat.src_type; + m_instruction.src[m_instruction.src_count].code = code.readu32(); + ++m_instruction.src_count; + m_instruction.length += sizeof(u32); + return; + } + + // Find if the instruction contains a literal constant + const auto it = std::ranges::find_if(m_instruction.src, [](InstOperand& src) { + return src.field == OperandField::LiteralConst; + }); + if (it != m_instruction.src.end()) { + it->code = code.readu32(); + m_instruction.length += sizeof(u32); + } +} + +void GcnDecodeContext::decodeInstructionSOP1(u32 hexInstruction) { + u32 ssrc0 = bit::extract(hexInstruction, 7, 0); + u32 op = bit::extract(hexInstruction, 15, 8); + u32 sdst = bit::extract(hexInstruction, 22, 16); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SOP1)); + + m_instruction.src[0].field = getOperandField(ssrc0); + m_instruction.src[0].code = ssrc0; + m_instruction.dst[0].field = getOperandField(sdst); + m_instruction.dst[0].code = sdst; + m_instruction.dst_count = 1; +} + +void GcnDecodeContext::decodeInstructionSOPP(u32 hexInstruction) { + u32 op = bit::extract(hexInstruction, 22, 16); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SOPP)); + + m_instruction.control.sopp = *reinterpret_cast(&hexInstruction); +} + +void GcnDecodeContext::decodeInstructionSOPC(u32 hexInstruction) { + u32 ssrc0 = bit::extract(hexInstruction, 7, 0); + u32 ssrc1 = bit::extract(hexInstruction, 15, 8); + u32 op = bit::extract(hexInstruction, 22, 16); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SOPC)); + + m_instruction.src[0].field = getOperandField(ssrc0); + m_instruction.src[0].code = ssrc0; + m_instruction.src[1].field = getOperandField(ssrc1); + m_instruction.src[1].code = ssrc1; +} + +void GcnDecodeContext::decodeInstructionSOPK(u32 hexInstruction) { + u32 sdst = bit::extract(hexInstruction, 22, 16); + u32 op = bit::extract(hexInstruction, 27, 23); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SOPK)); + + m_instruction.dst[0].field = getOperandField(sdst); + m_instruction.dst[0].code = sdst; + m_instruction.dst_count = 1; + + m_instruction.control.sopk = *reinterpret_cast(&hexInstruction); +} + +void GcnDecodeContext::decodeInstructionSOP2(u32 hexInstruction) { + u32 ssrc0 = bit::extract(hexInstruction, 7, 0); + u32 ssrc1 = bit::extract(hexInstruction, 15, 8); + u32 sdst = bit::extract(hexInstruction, 22, 16); + u32 op = bit::extract(hexInstruction, 29, 23); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SOP2)); + + m_instruction.src[0].field = getOperandField(ssrc0); + m_instruction.src[0].code = ssrc0; + m_instruction.src[1].field = getOperandField(ssrc1); + m_instruction.src[1].code = ssrc1; + m_instruction.dst[0].field = getOperandField(sdst); + m_instruction.dst[0].code = sdst; + m_instruction.dst_count = 1; +} + +void GcnDecodeContext::decodeInstructionVOP1(u32 hexInstruction) { + u32 src0 = bit::extract(hexInstruction, 8, 0); + u32 op = bit::extract(hexInstruction, 16, 9); + u32 vdst = bit::extract(hexInstruction, 24, 17); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VOP1)); + + m_instruction.src[0].field = getOperandField(src0); + m_instruction.src[0].code = + m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + m_instruction.dst_count = 1; + + OpcodeVOP1 vop1Op = static_cast(op); + if (vop1Op == OpcodeVOP1::V_READFIRSTLANE_B32) { + m_instruction.dst[1].field = getOperandField(vdst); + m_instruction.dst[1].type = ScalarType::Uint32; + m_instruction.dst[1].code = vdst; + } +} + +void GcnDecodeContext::decodeInstructionVOPC(u32 hexInstruction) { + u32 src0 = bit::extract(hexInstruction, 8, 0); + u32 vsrc1 = bit::extract(hexInstruction, 16, 9); + u32 op = bit::extract(hexInstruction, 24, 17); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VOPC)); + + m_instruction.src[0].field = getOperandField(src0); + m_instruction.src[0].code = + m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = vsrc1; + // VOPC dst is forced to VCC. + // In order to be unified with VOP3 encoding, + // we store it to dst[1] + m_instruction.dst[1].field = OperandField::VccLo; + m_instruction.dst[1].type = ScalarType::Uint64; + m_instruction.dst[1].code = static_cast(OperandField::VccLo); +} + +void GcnDecodeContext::decodeInstructionVOP2(u32 hexInstruction) { + u32 src0 = bit::extract(hexInstruction, 8, 0); + u32 vsrc1 = bit::extract(hexInstruction, 16, 9); + u32 vdst = bit::extract(hexInstruction, 24, 17); + u32 op = bit::extract(hexInstruction, 30, 25); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VOP2)); + + m_instruction.src[0].field = getOperandField(src0); + m_instruction.src[0].code = + m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = vsrc1; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + m_instruction.dst_count = 1; + + OpcodeVOP2 vop2Op = static_cast(op); + if (vop2Op == OpcodeVOP2::V_READLANE_B32 || vop2Op == OpcodeVOP2::V_WRITELANE_B32) { + // vsrc1 is scalar for lane instructions + m_instruction.src[1].field = getOperandField(vsrc1); + // dst is sgpr + m_instruction.dst[1].field = OperandField::ScalarGPR; + m_instruction.dst[1].type = ScalarType::Uint32; + m_instruction.dst[1].code = vdst; + } else if (IsVop3BEncoding(m_instruction.opcode)) { + m_instruction.dst[1].field = OperandField::VccLo; + m_instruction.dst[1].type = ScalarType::Uint64; + m_instruction.dst[1].code = static_cast(OperandField::VccLo); + } +} + +void GcnDecodeContext::decodeInstructionSMRD(u32 hexInstruction) { + u32 sbase = bit::extract(hexInstruction, 14, 9); + u32 sdst = bit::extract(hexInstruction, 21, 15); + u32 op = bit::extract(hexInstruction, 26, 22); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_SMRD)); + + m_instruction.src[0].field = OperandField::ScalarGPR; + m_instruction.src[0].code = sbase; + m_instruction.dst[0].field = OperandField::ScalarGPR; + m_instruction.dst[0].code = sdst; + m_instruction.dst_count = 1; + + m_instruction.control.smrd = *reinterpret_cast(&hexInstruction); + + if (op <= static_cast(OpcodeSMRD::S_LOAD_DWORDX16)) { + m_instruction.control.smrd.count = 1 << op; + } else if (op >= static_cast(OpcodeSMRD::S_BUFFER_LOAD_DWORD) && + op <= static_cast(OpcodeSMRD::S_BUFFER_LOAD_DWORDX16)) { + m_instruction.control.smrd.count = 1 << (op - 8); + } + + if (m_instruction.control.smrd.imm == 0) { + u32 code = m_instruction.control.smrd.offset; + m_instruction.src[1].field = getOperandField(code); + m_instruction.src[1].type = ScalarType::Uint32; + m_instruction.src[1].code = code; + } +} + +void GcnDecodeContext::decodeInstructionVINTRP(u32 hexInstruction) { + u32 vsrc = bit::extract(hexInstruction, 7, 0); + u32 op = bit::extract(hexInstruction, 17, 16); + u32 vdst = bit::extract(hexInstruction, 25, 18); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VINTRP)); + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = vsrc; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + m_instruction.dst_count = 1; + + m_instruction.control.vintrp = *reinterpret_cast(&hexInstruction); +} + +void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { + u32 vdst = bit::extract(hexInstruction, 7, 0); + u32 sdst = bit::extract(hexInstruction, 14, 8); // For VOP3B + u32 op = bit::extract(hexInstruction, 25, 17); + u32 src0 = bit::extract(hexInstruction, 40, 32); + u32 src1 = bit::extract(hexInstruction, 49, 41); + u32 src2 = bit::extract(hexInstruction, 58, 50); + + if (op >= static_cast(OpcodeVOP3::V_CMP_F_F32) && + op <= static_cast(OpcodeVOP3::V_CMPX_T_U64)) { + // Map from VOP3 to VOPC + u32 vopcOp = op - static_cast(OpMapVOP3VOPX::VOP3_TO_VOPC); + m_instruction.opcode = + static_cast(vopcOp + static_cast(OpcodeMap::OP_MAP_VOPC)); + } else if (op >= static_cast(OpcodeVOP3::V_CNDMASK_B32) && + op <= static_cast(OpcodeVOP3::V_CVT_PK_I16_I32)) { + // Map from VOP3 to VOP2 + u32 vop2Op = op - static_cast(OpMapVOP3VOPX::VOP3_TO_VOP2); + m_instruction.opcode = + static_cast(vop2Op + static_cast(OpcodeMap::OP_MAP_VOP2)); + } else if (op >= static_cast(OpcodeVOP3::V_NOP) && + op <= static_cast(OpcodeVOP3::V_MOVRELSD_B32)) { + // Map from VOP3 to VOP1 + u32 vop1Op = op - static_cast(OpMapVOP3VOPX::VOP3_TO_VOP1); + m_instruction.opcode = + static_cast(vop1Op + static_cast(OpcodeMap::OP_MAP_VOP1)); + } else { + // VOP3 encoding, do not map. + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VOP3)); + } + + m_instruction.src[0].field = getOperandField(src0); + m_instruction.src[0].code = + m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0; + m_instruction.src[1].field = getOperandField(src1); + m_instruction.src[1].code = + m_instruction.src[1].field == OperandField::VectorGPR ? src1 - VectorGPRMin : src1; + m_instruction.src[2].field = getOperandField(src2); + m_instruction.src[2].code = + m_instruction.src[2].field == OperandField::VectorGPR ? src2 - VectorGPRMin : src2; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + + OpcodeVOP3 vop3Op = static_cast(op); + if (IsVop3BEncoding(m_instruction.opcode)) { + m_instruction.dst[1].field = OperandField::ScalarGPR; + m_instruction.dst[1].type = ScalarType::Uint64; + m_instruction.dst[1].code = sdst; + } else { + if (vop3Op >= OpcodeVOP3::V_CMP_F_F32 && vop3Op <= OpcodeVOP3::V_CMPX_T_U64) { + m_instruction.dst[1].field = getOperandField(vdst); + m_instruction.dst[1].type = ScalarType::Uint64; + m_instruction.dst[1].code = vdst; + } else if (vop3Op >= OpcodeVOP3::V_READLANE_B32 && vop3Op <= OpcodeVOP3::V_WRITELANE_B32) { + // vsrc1 is scalar for lane instructions + m_instruction.src[1].field = getOperandField(src1); + // dst is sgpr for lane instruction + m_instruction.dst[1].field = OperandField::ScalarGPR; + m_instruction.dst[1].type = ScalarType::Uint32; + m_instruction.dst[1].code = vdst; + } + } + + if (op >= static_cast(OpcodeVOP3::V_ADD_I32) && + op <= static_cast(OpcodeVOP3::V_DIV_SCALE_F64)) { + // VOP3B has a sdst operand. + m_instruction.dst_count = 2; + } else { + m_instruction.dst_count = 1; + } + + m_instruction.control.vop3 = *reinterpret_cast(&hexInstruction); + + // update input modifier + auto& control = m_instruction.control.vop3; + for (u32 i = 0; i != 3; ++i) { + if (control.abs & (1u << i)) { + m_instruction.src[i].input_modifier.abs = true; + } + + if (control.neg & (1u << i)) { + m_instruction.src[i].input_modifier.neg = true; + } + } + + // update output modifier + auto& outputMod = m_instruction.dst[0].output_modifier; + + outputMod.clamp = static_cast(control.clmp); + switch (control.omod) { + case 0: + outputMod.multiplier = std::numeric_limits::quiet_NaN(); + break; + case 1: + outputMod.multiplier = 2.0f; + break; + case 2: + outputMod.multiplier = 4.0f; + break; + case 3: + outputMod.multiplier = 0.5f; + break; + } +} + +void GcnDecodeContext::decodeInstructionMUBUF(uint64_t hexInstruction) { + u32 op = bit::extract(hexInstruction, 24, 18); + u32 vaddr = bit::extract(hexInstruction, 39, 32); + u32 vdata = bit::extract(hexInstruction, 47, 40); + u32 srsrc = bit::extract(hexInstruction, 52, 48); + u32 soffset = bit::extract(hexInstruction, 63, 56); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_MUBUF)); + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = vaddr; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = vdata; + m_instruction.src[2].field = OperandField::ScalarGPR; + m_instruction.src[2].code = srsrc; + m_instruction.src[3].field = getOperandField(soffset); + m_instruction.src[3].code = soffset; + + m_instruction.control.mubuf = *reinterpret_cast(&hexInstruction); + + if (op >= static_cast(OpcodeMUBUF::BUFFER_LOAD_FORMAT_X) && + op <= static_cast(OpcodeMUBUF::BUFFER_LOAD_FORMAT_XYZW)) { + m_instruction.control.mubuf.count = op + 1; + m_instruction.control.mubuf.size = (op + 1) * sizeof(u32); + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_STORE_FORMAT_X) && + op <= static_cast(OpcodeMUBUF::BUFFER_STORE_FORMAT_XYZW)) { + m_instruction.control.mubuf.count = op - 3; + m_instruction.control.mubuf.size = (op - 3) * sizeof(u32); + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_LOAD_DWORD) && + op <= static_cast(OpcodeMUBUF::BUFFER_LOAD_DWORDX3)) { + m_instruction.control.mubuf.count = + op == static_cast(OpcodeMUBUF::BUFFER_LOAD_DWORDX3) ? 3 : 1 << (op - 12); + m_instruction.control.mubuf.size = m_instruction.control.mubuf.count * sizeof(u32); + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_STORE_DWORD) && + op <= static_cast(OpcodeMUBUF::BUFFER_STORE_DWORDX3)) { + m_instruction.control.mubuf.count = + op == static_cast(OpcodeMUBUF::BUFFER_STORE_DWORDX3) ? 3 : 1 << (op - 28); + m_instruction.control.mubuf.size = m_instruction.control.mubuf.count * sizeof(u32); + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_LOAD_UBYTE) && + op <= static_cast(OpcodeMUBUF::BUFFER_LOAD_SSHORT)) { + m_instruction.control.mubuf.count = 1; + if (op >= static_cast(OpcodeMUBUF::BUFFER_LOAD_UBYTE) && + op <= static_cast(OpcodeMUBUF::BUFFER_LOAD_SBYTE)) { + m_instruction.control.mubuf.size = 1; + } else { + m_instruction.control.mubuf.size = 2; + } + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_STORE_BYTE) && + op <= static_cast(OpcodeMUBUF::BUFFER_STORE_SHORT)) { + m_instruction.control.mubuf.count = 1; + if (op == static_cast(OpcodeMUBUF::BUFFER_STORE_BYTE)) { + m_instruction.control.mubuf.size = 1; + } else { + m_instruction.control.mubuf.size = 2; + } + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_ATOMIC_SWAP) && + op <= static_cast(OpcodeMUBUF::BUFFER_ATOMIC_FMAX)) { + m_instruction.control.mubuf.count = 1; + m_instruction.control.mubuf.size = sizeof(u32); + } else if (op >= static_cast(OpcodeMUBUF::BUFFER_ATOMIC_SWAP_X2) && + op <= static_cast(OpcodeMUBUF::BUFFER_ATOMIC_FMAX_X2)) { + m_instruction.control.mubuf.count = 2; + m_instruction.control.mubuf.size = sizeof(u32) * 2; + } +} + +void GcnDecodeContext::decodeInstructionMTBUF(uint64_t hexInstruction) { + u32 op = bit::extract(hexInstruction, 18, 16); + u32 vaddr = bit::extract(hexInstruction, 39, 32); + u32 vdata = bit::extract(hexInstruction, 47, 40); + u32 srsrc = bit::extract(hexInstruction, 52, 48); + u32 soffset = bit::extract(hexInstruction, 63, 56); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_MTBUF)); + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = vaddr; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = vdata; + m_instruction.src[2].field = OperandField::ScalarGPR; + m_instruction.src[2].code = srsrc; + m_instruction.src[3].field = getOperandField(soffset); + m_instruction.src[3].code = soffset; + + m_instruction.control.mtbuf = *reinterpret_cast(&hexInstruction); + + if (op >= static_cast(OpcodeMTBUF::TBUFFER_LOAD_FORMAT_X) && + op <= static_cast(OpcodeMTBUF::TBUFFER_LOAD_FORMAT_XYZW)) { + m_instruction.control.mtbuf.count = op + 1; + } else if (op >= static_cast(OpcodeMTBUF::TBUFFER_STORE_FORMAT_X) && + op <= static_cast(OpcodeMTBUF::TBUFFER_STORE_FORMAT_XYZW)) { + m_instruction.control.mtbuf.count = op - 3; + } +} + +u32 GcnDecodeContext::getMimgModifier(Opcode opcode) { + MimgModifierFlags flags = {}; + + switch (opcode) { + case Opcode::IMAGE_SAMPLE: + break; + case Opcode::IMAGE_SAMPLE_CL: + flags.set(MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_D: + flags.set(MimgModifier::Derivative); + break; + case Opcode::IMAGE_SAMPLE_D_CL: + flags.set(MimgModifier::Derivative, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_L: + flags.set(MimgModifier::Lod); + break; + case Opcode::IMAGE_SAMPLE_B: + flags.set(MimgModifier::LodBias); + break; + case Opcode::IMAGE_SAMPLE_B_CL: + flags.set(MimgModifier::LodBias, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_LZ: + flags.set(MimgModifier::Level0); + break; + case Opcode::IMAGE_SAMPLE_C: + flags.set(MimgModifier::Pcf); + break; + case Opcode::IMAGE_SAMPLE_C_CL: + flags.set(MimgModifier::Pcf, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_C_D: + flags.set(MimgModifier::Pcf, MimgModifier::Derivative); + break; + case Opcode::IMAGE_SAMPLE_C_D_CL: + flags.set(MimgModifier::Pcf, MimgModifier::Derivative, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_C_L: + flags.set(MimgModifier::Pcf, MimgModifier::Lod); + break; + case Opcode::IMAGE_SAMPLE_C_B: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias); + break; + case Opcode::IMAGE_SAMPLE_C_B_CL: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_C_LZ: + flags.set(MimgModifier::Pcf, MimgModifier::Level0); + break; + case Opcode::IMAGE_SAMPLE_O: + flags.set(MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_CL_O: + flags.set(MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_D_O: + flags.set(MimgModifier::Derivative, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_D_CL_O: + flags.set(MimgModifier::Derivative, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_L_O: + flags.set(MimgModifier::Lod, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_B_O: + flags.set(MimgModifier::LodBias, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_B_CL_O: + flags.set(MimgModifier::LodBias, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_LZ_O: + flags.set(MimgModifier::Level0, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_O: + flags.set(MimgModifier::Pcf, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_D_O: + flags.set(MimgModifier::Pcf, MimgModifier::Derivative, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_D_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::Derivative, MimgModifier::LodClamp, + MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_L_O: + flags.set(MimgModifier::Pcf, MimgModifier::Lod, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_B_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_B_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::LodClamp, + MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_LZ_O: + flags.set(MimgModifier::Pcf, MimgModifier::Level0, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4: + break; + case Opcode::IMAGE_GATHER4_CL: + flags.set(MimgModifier::LodClamp); + break; + case Opcode::IMAGE_GATHER4_L: + flags.set(MimgModifier::Lod); + break; + case Opcode::IMAGE_GATHER4_B: + flags.set(MimgModifier::LodBias); + break; + case Opcode::IMAGE_GATHER4_B_CL: + flags.set(MimgModifier::LodBias, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_GATHER4_LZ: + flags.set(MimgModifier::Level0); + break; + case Opcode::IMAGE_GATHER4_C: + flags.set(MimgModifier::Pcf); + break; + case Opcode::IMAGE_GATHER4_C_CL: + flags.set(MimgModifier::Pcf, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_GATHER4_C_L: + flags.set(MimgModifier::Pcf, MimgModifier::Lod); + break; + case Opcode::IMAGE_GATHER4_C_B: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias); + break; + case Opcode::IMAGE_GATHER4_C_B_CL: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_GATHER4_C_LZ: + flags.set(MimgModifier::Pcf, MimgModifier::Level0); + break; + case Opcode::IMAGE_GATHER4_O: + flags.set(MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_CL_O: + flags.set(MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_L_O: + flags.set(MimgModifier::Lod, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_B_O: + flags.set(MimgModifier::LodBias, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_B_CL_O: + flags.set(MimgModifier::LodBias, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_LZ_O: + flags.set(MimgModifier::Level0, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_O: + flags.set(MimgModifier::Pcf, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_L_O: + flags.set(MimgModifier::Pcf, MimgModifier::Lod, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_B_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_B_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::LodBias, MimgModifier::LodClamp, + MimgModifier::Offset); + break; + case Opcode::IMAGE_GATHER4_C_LZ_O: + flags.set(MimgModifier::Pcf, MimgModifier::Level0, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_CD: + flags.set(MimgModifier::CoarseDerivative); + break; + case Opcode::IMAGE_SAMPLE_CD_CL: + flags.set(MimgModifier::CoarseDerivative, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_C_CD: + flags.set(MimgModifier::Pcf, MimgModifier::CoarseDerivative); + break; + case Opcode::IMAGE_SAMPLE_C_CD_CL: + flags.set(MimgModifier::Pcf, MimgModifier::CoarseDerivative, MimgModifier::LodClamp); + break; + case Opcode::IMAGE_SAMPLE_CD_O: + flags.set(MimgModifier::CoarseDerivative, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_CD_CL_O: + flags.set(MimgModifier::CoarseDerivative, MimgModifier::LodClamp, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_CD_O: + flags.set(MimgModifier::Pcf, MimgModifier::CoarseDerivative, MimgModifier::Offset); + break; + case Opcode::IMAGE_SAMPLE_C_CD_CL_O: + flags.set(MimgModifier::Pcf, MimgModifier::CoarseDerivative, MimgModifier::LodClamp, + MimgModifier::Offset); + break; + } + + return flags.raw(); +} + +void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) { + u32 op = bit::extract(hexInstruction, 24, 18); + u32 vaddr = bit::extract(hexInstruction, 39, 32); + u32 vdata = bit::extract(hexInstruction, 47, 40); + u32 srsrc = bit::extract(hexInstruction, 52, 48); + u32 ssamp = bit::extract(hexInstruction, 57, 53); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_MIMG)); + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = vaddr; + m_instruction.src[2].field = OperandField::ScalarGPR; + m_instruction.src[2].code = srsrc; + m_instruction.src[3].field = OperandField::ScalarGPR; + m_instruction.src[3].code = ssamp; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdata; + + m_instruction.control.mimg = *reinterpret_cast(&hexInstruction); + m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode); +} + +void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) { + OpcodeDS op = (OpcodeDS)bit::extract(hexInstruction, 25, 18); + u32 addr = bit::extract(hexInstruction, 39, 32); + u32 data0 = bit::extract(hexInstruction, 47, 40); + u32 data1 = bit::extract(hexInstruction, 55, 48); + u32 vdst = bit::extract(hexInstruction, 63, 56); + + m_instruction.opcode = static_cast(u32(op) + static_cast(OpcodeMap::OP_MAP_DS)); + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = addr; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = data0; + m_instruction.src[2].field = OperandField::VectorGPR; + m_instruction.src[2].code = data1; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + m_instruction.dst_count = 1; + + m_instruction.control.ds = *reinterpret_cast(&hexInstruction); + + auto instFormat = InstructionFormat(InstEncoding::DS, (u32)op); + + m_instruction.control.ds.dual = + op == OpcodeDS::DS_WRITE2_B32 || op == OpcodeDS::DS_WRXCHG2_RTN_B32 || + op == OpcodeDS::DS_READ2_B32 || op == OpcodeDS::DS_WRITE2_B64 || + op == OpcodeDS::DS_WRXCHG2_RTN_B64 || op == OpcodeDS::DS_READ2_B64; + + m_instruction.control.ds.sign = instFormat.src_type == ScalarType::Sint32; + + m_instruction.control.ds.relative = + op >= OpcodeDS::DS_ADD_SRC2_U32 && op <= OpcodeDS::DS_MAX_SRC2_F64; + + m_instruction.control.ds.stride = + op == OpcodeDS::DS_WRITE2ST64_B32 || op == OpcodeDS::DS_WRXCHG2ST64_RTN_B32 || + op == OpcodeDS::DS_READ2ST64_B32 || op == OpcodeDS::DS_WRITE2ST64_B64 || + op == OpcodeDS::DS_WRXCHG2ST64_RTN_B64 || op == OpcodeDS::DS_READ2ST64_B64; + + if (op == OpcodeDS::DS_WRITE_B8 || op == OpcodeDS::DS_READ_I8 || op == OpcodeDS::DS_READ_U8) { + m_instruction.control.ds.size = 1; + } else if (op == OpcodeDS::DS_WRITE_B16 || op == OpcodeDS::DS_READ_I16 || + op == OpcodeDS::DS_READ_U16) { + m_instruction.control.ds.size = 2; + } else { + if (instFormat.src_type == ScalarType::Sint32 || + instFormat.src_type == ScalarType::Uint32) { + m_instruction.control.ds.size = 4; + } else if (instFormat.src_type == ScalarType::Sint64 || + instFormat.src_type == ScalarType::Uint64) { + m_instruction.control.ds.size = 8; + } else { + m_instruction.control.ds.size = 0; + } + } +} + +void GcnDecodeContext::decodeInstructionEXP(uint64_t hexInstruction) { + u32 vsrc0 = bit::extract(hexInstruction, 39, 32); + u32 vsrc1 = bit::extract(hexInstruction, 47, 40); + u32 vsrc2 = bit::extract(hexInstruction, 55, 48); + u32 vsrc3 = bit::extract(hexInstruction, 63, 56); + + m_instruction.opcode = Opcode::EXP; + + m_instruction.src[0].field = OperandField::VectorGPR; + m_instruction.src[0].code = vsrc0; + m_instruction.src[1].field = OperandField::VectorGPR; + m_instruction.src[1].code = vsrc1; + m_instruction.src[2].field = OperandField::VectorGPR; + m_instruction.src[2].code = vsrc2; + m_instruction.src[3].field = OperandField::VectorGPR; + m_instruction.src[3].code = vsrc3; + + m_instruction.control.exp = *reinterpret_cast(&hexInstruction); +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/decode.h b/src/shader_recompiler/frontend/decode.h new file mode 100644 index 00000000..8125ce7f --- /dev/null +++ b/src/shader_recompiler/frontend/decode.h @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/frontend/instruction.h" + +namespace Shader::Gcn { + +struct InstFormat { + InstClass inst_class = InstClass::Undefined; + InstCategory inst_category = InstCategory::Undefined; + u32 src_count = 0; + u32 dst_count = 0; + ScalarType src_type = ScalarType::Undefined; + ScalarType dst_type = ScalarType::Undefined; +}; + +InstEncoding GetInstructionEncoding(u32 token); + +u32 GetEncodingLength(InstEncoding encoding); + +InstFormat InstructionFormat(InstEncoding encoding, u32 opcode); + +Opcode DecodeOpcode(u32 token); + +class GcnCodeSlice { +public: + GcnCodeSlice(const u32* ptr, const u32* end) : m_ptr(ptr), m_end(end) {} + GcnCodeSlice(const GcnCodeSlice& other) = default; + ~GcnCodeSlice() = default; + + u32 at(u32 id) const { + return m_ptr[id]; + } + + u32 readu32() { + return *(m_ptr++); + } + + u64 readu64() { + const u64 value = *(u64*)m_ptr; + m_ptr += 2; + return value; + } + + bool atEnd() const { + return m_ptr == m_end; + } + +private: + const u32* m_ptr{}; + const u32* m_end{}; +}; + +class GcnDecodeContext { +public: + GcnInst decodeInstruction(GcnCodeSlice& code); + +private: + uint32_t getEncodingLength(InstEncoding encoding); + uint32_t getOpMapOffset(InstEncoding encoding); + uint32_t mapEncodingOp(InstEncoding encoding, Opcode opcode); + void updateInstructionMeta(InstEncoding encoding); + uint32_t getMimgModifier(Opcode opcode); + void repairOperandType(); + + OperandField getOperandField(uint32_t code); + + void decodeInstruction32(InstEncoding encoding, GcnCodeSlice& code); + void decodeInstruction64(InstEncoding encoding, GcnCodeSlice& code); + void decodeLiteralConstant(InstEncoding encoding, GcnCodeSlice& code); + + // 32 bits encodings + void decodeInstructionSOP1(uint32_t hexInstruction); + void decodeInstructionSOPP(uint32_t hexInstruction); + void decodeInstructionSOPC(uint32_t hexInstruction); + void decodeInstructionSOPK(uint32_t hexInstruction); + void decodeInstructionSOP2(uint32_t hexInstruction); + void decodeInstructionVOP1(uint32_t hexInstruction); + void decodeInstructionVOPC(uint32_t hexInstruction); + void decodeInstructionVOP2(uint32_t hexInstruction); + void decodeInstructionSMRD(uint32_t hexInstruction); + void decodeInstructionVINTRP(uint32_t hexInstruction); + // 64 bits encodings + void decodeInstructionVOP3(uint64_t hexInstruction); + void decodeInstructionMUBUF(uint64_t hexInstruction); + void decodeInstructionMTBUF(uint64_t hexInstruction); + void decodeInstructionMIMG(uint64_t hexInstruction); + void decodeInstructionDS(uint64_t hexInstruction); + void decodeInstructionEXP(uint64_t hexInstruction); + +private: + GcnInst m_instruction; +}; + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp new file mode 100644 index 00000000..379ed85f --- /dev/null +++ b/src/shader_recompiler/frontend/format.cpp @@ -0,0 +1,3733 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "shader_recompiler/frontend/decode.h" + +namespace Shader::Gcn { + +constexpr std::array InstructionFormatSOP2 = {{ + // 0 = S_ADD_U32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 1 = S_SUB_U32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 2 = S_ADD_I32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 3 = S_SUB_I32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 4 = S_ADDC_U32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 5 = S_SUBB_U32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 6 = S_MIN_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 7 = S_MIN_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 8 = S_MAX_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 9 = S_MAX_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 10 = S_CSELECT_B32 + {InstClass::ScalarSelect, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 11 = S_CSELECT_B64 + {InstClass::ScalarSelect, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + {}, + // 14 = S_AND_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 15 = S_AND_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 16 = S_OR_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 17 = S_OR_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 18 = S_XOR_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 19 = S_XOR_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 20 = S_ANDN2_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 21 = S_ANDN2_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 22 = S_ORN2_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 23 = S_ORN2_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 24 = S_NAND_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 25 = S_NAND_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 26 = S_NOR_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 27 = S_NOR_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 28 = S_XNOR_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 29 = S_XNOR_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 30 = S_LSHL_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 31 = S_LSHL_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 32 = S_LSHR_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 33 = S_LSHR_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 34 = S_ASHR_I32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 35 = S_ASHR_I64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 36 = S_BFM_B32 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 37 = S_BFM_B64 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 38 = S_MUL_I32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 39 = S_BFE_U32 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 40 = S_BFE_I32 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 41 = S_BFE_U64 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 42 = S_BFE_I64 + {InstClass::ScalarBitField, InstCategory::ScalarALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 43 = S_CBRANCH_G_FORK + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 2, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 44 = S_ABSDIFF_I32 + {InstClass::ScalarAbs, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, +}}; + +constexpr std::array InstructionFormatSOPK = {{ + // 0 = S_MOVK_I32 + {InstClass::ScalarMov, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + {}, + // 2 = S_CMOVK_I32 + {InstClass::ScalarMov, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 3 = S_CMPK_EQ_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 4 = S_CMPK_LI32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 5 = S_CMPK_GT_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 6 = S_CMPK_GE_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 7 = S_CMPK_LT_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 8 = S_CMPK_LE_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 9 = S_CMPK_EQ_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 10 = S_CMPK_LG_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 11 = S_CMPK_GT_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 12 = S_CMPK_GE_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 13 = S_CMPK_LT_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 14 = S_CMPK_LE_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 0, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 15 = S_ADDK_I32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 16 = S_MULK_I32 + {InstClass::ScalarArith, InstCategory::ScalarALU, 0, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 17 = S_CBRANCH_I_FORK + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 18 = S_GETREG_B32 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 19 = S_SETREG_B32 + {InstClass::ScalarRegAccess, InstCategory::FlowControl, 0, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + // 21 = S_SETREIMM32_B32 + {InstClass::ScalarRegAccess, InstCategory::FlowControl, 0, 1, ScalarType::Uint32, + ScalarType::Uint32}, +}}; + +constexpr std::array InstructionFormatSOP1 = {{ + {}, + {}, + {}, + // 3 = S_MOV_B32 + {InstClass::ScalarMov, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 4 = S_MOV_B64 + {InstClass::ScalarMov, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 5 = S_CMOV_B32 + {InstClass::ScalarMov, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 6 = S_CMOV_B64 + {InstClass::ScalarMov, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 7 = S_NOT_B32 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 8 = S_NOT_B64 + {InstClass::ScalarBitLogic, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 9 = S_WQM_B32 + {InstClass::ScalarQuadMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 10 = S_WQM_B64 + {InstClass::ScalarQuadMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 11 = S_BREV_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 12 = S_BREV_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 13 = S_BCNT0_I32_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Sint32}, + // 14 = S_BCNT0_I32_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Sint32}, + // 15 = S_BCNT1_I32_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Sint32}, + // 16 = S_BCNT1_I32_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Sint32}, + // 17 = S_FF0_I32_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Sint32}, + // 18 = S_FF0_I32_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Sint32}, + // 19 = S_FF1_I32_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Sint32}, + // 20 = S_FF1_I32_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Sint32}, + // 21 = S_FLBIT_I32_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Sint32}, + // 22 = S_FLBIT_I32_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Sint32}, + // 23 = S_FLBIT_I32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 24 = S_FLBIT_I32_I64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Sint64, + ScalarType::Sint32}, + // 25 = S_SEXT_I32_I8 + {InstClass::ScalarConv, InstCategory::ScalarALU, 1, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 26 = S_SEXT_I32_I16 + {InstClass::ScalarConv, InstCategory::ScalarALU, 1, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 27 = S_BITSET0_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 28 = S_BITSET0_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 29 = S_BITSET1_B32 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 30 = S_BITSET1_B64 + {InstClass::ScalarBitManip, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 31 = S_GETPC_B64 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 32 = S_SETPC_B64 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 33 = S_SWAPPC_B64 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 34 = S_RFE_B64 + {InstClass::Undefined, InstCategory::Undefined, 1, 1, ScalarType::Uint64, ScalarType::Uint64}, + {}, + // 36 = S_AND_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 37 = S_OR_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 38 = S_XOR_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 39 = S_ANDN2_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 40 = S_ORN2_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 41 = S_NAND_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 42 = S_NOR_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 43 = S_XNOR_SAVEEXEC_B64 + {InstClass::ScalarExecMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 44 = S_QUADMASK_B32 + {InstClass::ScalarQuadMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 45 = S_QUADMASK_B64 + {InstClass::ScalarQuadMask, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 46 = S_MOVRELS_B32 + {InstClass::ScalarMovRel, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 47 = S_MOVRELS_B64 + {InstClass::ScalarMovRel, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 48 = S_MOVRELD_B32 + {InstClass::ScalarMovRel, InstCategory::ScalarALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 49 = S_MOVRELD_B64 + {InstClass::ScalarMovRel, InstCategory::ScalarALU, 1, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 50 = S_CBRANCH_JOIN + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + // 52 = S_ABS_I32 + {InstClass::ScalarAbs, InstCategory::ScalarALU, 1, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 53 = S_MOV_FED_B32 + {InstClass::Undefined, InstCategory::Undefined, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, +}}; + +constexpr std::array InstructionFormatSOPC = {{ + // 0 = S_CMP_EQ_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 1 = S_CMP_LI32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 2 = S_CMP_GT_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 3 = S_CMP_GE_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 4 = S_CMP_LT_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 5 = S_CMP_LE_I32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 6 = S_CMP_EQ_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 7 = S_CMP_LG_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 8 = S_CMP_GT_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 9 = S_CMP_GE_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 10 = S_CMP_LT_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 11 = S_CMP_LE_U32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 12 = S_BITCMP0_B32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 13 = S_BITCMP1_B32 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 14 = S_BITCMP0_B64 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 15 = S_BITCMP1_B64 + {InstClass::ScalarCmp, InstCategory::ScalarALU, 2, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 16 = S_SETVSKIP + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 2, 1, ScalarType::Undefined, + ScalarType::Undefined}, +}}; + +constexpr std::array InstructionFormatSOPP = {{ + // 0 = S_NOP + {InstClass::ScalarWait, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 1 = S_ENDPGM + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 2 = S_BRANCH + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + {}, + // 4 = S_CBRANCH_SCC0 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 5 = S_CBRANCH_SCC1 + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 6 = S_CBRANCH_VCCZ + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 7 = S_CBRANCH_VCCNZ + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 8 = S_CBRANCH_EXECZ + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 9 = S_CBRANCH_EXECNZ + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 10 = S_BARRIER + {InstClass::ScalarSync, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + {}, + // 12 = S_WAITCNT + {InstClass::ScalarSync, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 13 = S_SETHALT + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + // 14 = S_SLEEP + {InstClass::ScalarSync, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 15 = S_SETPRIO + {InstClass::ScalarSync, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 16 = S_SENDMSG + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 17 = S_SENDMSGHALT + {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 18 = S_TRAP + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + // 19 = S_ICACHE_INV + {InstClass::ScalarCache, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, + // 20 = S_INCPERFLEVEL + {InstClass::DbgProf, InstCategory::DebugProfile, 0, 1, ScalarType::Any, ScalarType::Any}, + // 21 = S_DECPERFLEVEL + {InstClass::DbgProf, InstCategory::DebugProfile, 0, 1, ScalarType::Any, ScalarType::Any}, + // 22 = S_TTRACEDATA + {InstClass::DbgProf, InstCategory::DebugProfile, 0, 1, ScalarType::Any, ScalarType::Any}, + // 23 = S_CBRANCH_CDBGSYS + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + // 24 = S_CBRANCH_CDBGUSER + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + // 25 = S_CBRANCH_CDBGSYS_OR_USER + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + // 26 = S_CBRANCH_CDBGSYS_AND_USER + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, +}}; + +constexpr std::array InstructionFormatSMRD = {{ + // 0 = S_LOAD_DWORD + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 1 = S_LOAD_DWORDX2 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 2 = S_LOAD_DWORDX4 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 3 = S_LOAD_DWORDX8 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 4 = S_LOAD_DWORDX16 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, + {}, + // 8 = S_BUFFER_LOAD_DWORD + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 9 = S_BUFFER_LOAD_DWORDX2 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 10 = S_BUFFER_LOAD_DWORDX4 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 11 = S_BUFFER_LOAD_DWORDX8 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 12 = S_BUFFER_LOAD_DWORDX16 + {InstClass::ScalarMemRd, InstCategory::ScalarMemory, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 29 = S_DCACHE_INV_VOL + {InstClass::ScalarMemUt, InstCategory::ScalarMemory, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 30 = S_MEMTIME + {InstClass::ScalarMemUt, InstCategory::ScalarMemory, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 31 = S_DCACHE_INV + {InstClass::ScalarMemUt, InstCategory::ScalarMemory, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, +}}; + +constexpr std::array InstructionFormatVOP2 = {{ + // 0 = V_CNDMASK_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 1 = V_READLANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 2 = V_WRITELANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 3 = V_ADD_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 4 = V_SUB_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 5 = V_SUBREV_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 6 = V_MAC_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 7 = V_MUL_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 8 = V_MUL_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 9 = V_MUL_I32_I24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 10 = V_MUL_HI_I32_I24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 11 = V_MUL_U32_U24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 12 = V_MUL_HI_U32_U24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 13 = V_MIN_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 14 = V_MAX_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 15 = V_MIN_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 16 = V_MAX_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 17 = V_MIN_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 18 = V_MAX_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 19 = V_MIN_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 20 = V_MAX_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 21 = V_LSHR_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 22 = V_LSHRREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 23 = V_ASHR_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 24 = V_ASHRREV_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 25 = V_LSHL_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 26 = V_LSHLREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 27 = V_AND_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 28 = V_OR_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 29 = V_XOR_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 30 = V_BFM_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 31 = V_MAC_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 32 = V_MADMK_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 33 = V_MADAK_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 34 = V_BCNT_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 35 = V_MBCNT_LO_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 36 = V_MBCNT_HI_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 37 = V_ADD_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 38 = V_SUB_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 39 = V_SUBREV_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 40 = V_ADDC_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 41 = V_SUBB_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 42 = V_SUBBREV_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 43 = V_LDEXP_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 44 = V_CVT_PKACCUM_U8_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 45 = V_CVT_PKNORM_I16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Sint32}, + // 46 = V_CVT_PKNORM_U16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 47 = V_CVT_PKRTZ_F16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 48 = V_CVT_PK_U16_U32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 49 = V_CVT_PK_I16_I32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, +}}; + +constexpr std::array InstructionFormatVOP3 = {{ + // 0 = V_CMP_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 1 = V_CMP_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 2 = V_CMP_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 3 = V_CMP_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 4 = V_CMP_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 5 = V_CMP_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 6 = V_CMP_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 7 = V_CMP_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 8 = V_CMP_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 9 = V_CMP_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 10 = V_CMP_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 11 = V_CMP_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 12 = V_CMP_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 13 = V_CMP_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 14 = V_CMP_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 15 = V_CMP_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 16 = V_CMPX_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 17 = V_CMPX_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 18 = V_CMPX_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 19 = V_CMPX_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 20 = V_CMPX_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 21 = V_CMPX_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 22 = V_CMPX_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 23 = V_CMPX_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 24 = V_CMPX_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 25 = V_CMPX_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 26 = V_CMPX_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 27 = V_CMPX_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 28 = V_CMPX_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 29 = V_CMPX_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 30 = V_CMPX_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 31 = V_CMPX_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 32 = V_CMP_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 33 = V_CMP_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 34 = V_CMP_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 35 = V_CMP_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 36 = V_CMP_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 37 = V_CMP_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 38 = V_CMP_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 39 = V_CMP_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 40 = V_CMP_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 41 = V_CMP_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 42 = V_CMP_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 43 = V_CMP_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 44 = V_CMP_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 45 = V_CMP_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 46 = V_CMP_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 47 = V_CMP_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 48 = V_CMPX_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 49 = V_CMPX_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 50 = V_CMPX_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 51 = V_CMPX_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 52 = V_CMPX_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 53 = V_CMPX_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 54 = V_CMPX_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 55 = V_CMPX_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 56 = V_CMPX_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 57 = V_CMPX_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 58 = V_CMPX_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 59 = V_CMPX_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 60 = V_CMPX_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 61 = V_CMPX_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 62 = V_CMPX_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 63 = V_CMPX_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 64 = V_CMPS_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 65 = V_CMPS_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 66 = V_CMPS_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 67 = V_CMPS_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 68 = V_CMPS_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 69 = V_CMPS_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 70 = V_CMPS_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 71 = V_CMPS_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 72 = V_CMPS_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 73 = V_CMPS_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 74 = V_CMPS_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 75 = V_CMPS_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 76 = V_CMPS_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 77 = V_CMPS_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 78 = V_CMPS_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 79 = V_CMPS_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 80 = V_CMPSX_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 81 = V_CMPSX_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 82 = V_CMPSX_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 83 = V_CMPSX_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 84 = V_CMPSX_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 85 = V_CMPSX_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 86 = V_CMPSX_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 87 = V_CMPSX_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 88 = V_CMPSX_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 89 = V_CMPSX_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 90 = V_CMPSX_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 91 = V_CMPSX_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 92 = V_CMPSX_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 93 = V_CMPSX_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 94 = V_CMPSX_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 95 = V_CMPSX_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 96 = V_CMPS_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 97 = V_CMPS_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 98 = V_CMPS_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 99 = V_CMPS_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 100 = V_CMPS_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 101 = V_CMPS_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 102 = V_CMPS_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 103 = V_CMPS_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 104 = V_CMPS_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 105 = V_CMPS_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 106 = V_CMPS_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 107 = V_CMPS_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 108 = V_CMPS_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 109 = V_CMPS_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 110 = V_CMPS_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 111 = V_CMPS_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 112 = V_CMPSX_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 113 = V_CMPSX_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 114 = V_CMPSX_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 115 = V_CMPSX_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 116 = V_CMPSX_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 117 = V_CMPSX_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 118 = V_CMPSX_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 119 = V_CMPSX_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 120 = V_CMPSX_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 121 = V_CMPSX_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 122 = V_CMPSX_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 123 = V_CMPSX_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 124 = V_CMPSX_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 125 = V_CMPSX_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 126 = V_CMPSX_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 127 = V_CMPSX_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 128 = V_CMP_F_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 129 = V_CMP_LT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 130 = V_CMP_EQ_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 131 = V_CMP_LE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 132 = V_CMP_GT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 133 = V_CMP_NE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 134 = V_CMP_GE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 135 = V_CMP_T_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 136 = V_CMP_CLASS_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 144 = V_CMPX_F_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 145 = V_CMPX_LT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 146 = V_CMPX_EQ_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 147 = V_CMPX_LE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 148 = V_CMPX_GT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 149 = V_CMPX_NE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 150 = V_CMPX_GE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 151 = V_CMPX_T_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 152 = V_CMPX_CLASS_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 160 = V_CMP_F_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 161 = V_CMP_LT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 162 = V_CMP_EQ_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 163 = V_CMP_LE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 164 = V_CMP_GT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 165 = V_CMP_NE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 166 = V_CMP_GE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 167 = V_CMP_T_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 168 = V_CMP_CLASS_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 176 = V_CMPX_F_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 177 = V_CMPX_LT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 178 = V_CMPX_EQ_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 179 = V_CMPX_LE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 180 = V_CMPX_GT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 181 = V_CMPX_NE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 182 = V_CMPX_GE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 183 = V_CMPX_T_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 184 = V_CMPX_CLASS_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 192 = V_CMP_F_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 193 = V_CMP_LT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 194 = V_CMP_EQ_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 195 = V_CMP_LE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 196 = V_CMP_GT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 197 = V_CMP_NE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 198 = V_CMP_GE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 199 = V_CMP_T_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 208 = V_CMPX_F_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 209 = V_CMPX_LT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 210 = V_CMPX_EQ_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 211 = V_CMPX_LE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 212 = V_CMPX_GT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 213 = V_CMPX_NE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 214 = V_CMPX_GE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 215 = V_CMPX_T_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 224 = V_CMP_F_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 225 = V_CMP_LT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 226 = V_CMP_EQ_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 227 = V_CMP_LE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 228 = V_CMP_GT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 229 = V_CMP_NE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 230 = V_CMP_GE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 231 = V_CMP_T_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 240 = V_CMPX_F_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 241 = V_CMPX_LT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 242 = V_CMPX_EQ_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 243 = V_CMPX_LE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 244 = V_CMPX_GT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 245 = V_CMPX_NE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 246 = V_CMPX_GE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 247 = V_CMPX_T_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 256 = V_CNDMASK_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 257 = V_READLANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 258 = V_WRITELANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 259 = V_ADD_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 260 = V_SUB_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 261 = V_SUBREV_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 262 = V_MAC_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 263 = V_MUL_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 264 = V_MUL_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 265 = V_MUL_I32_I24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 266 = V_MUL_HI_I32_I24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 267 = V_MUL_U32_U24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 268 = V_MUL_HI_U32_U24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 269 = V_MIN_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 270 = V_MAX_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 271 = V_MIN_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 272 = V_MAX_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 273 = V_MIN_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 274 = V_MAX_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 275 = V_MIN_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 276 = V_MAX_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 277 = V_LSHR_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 278 = V_LSHRREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 279 = V_ASHR_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 280 = V_ASHRREV_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 281 = V_LSHL_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 282 = V_LSHLREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 283 = V_AND_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 284 = V_OR_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 285 = V_XOR_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 286 = V_BFM_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 287 = V_MAC_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 288 = V_MADMK_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 289 = V_MADAK_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 290 = V_BCNT_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 291 = V_MBCNT_LO_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 292 = V_MBCNT_HI_U32_B32 + {InstClass::VectorThreadMask, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 293 = V_ADD_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 294 = V_SUB_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 295 = V_SUBREV_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 296 = V_ADDC_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 297 = V_SUBB_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 298 = V_SUBBREV_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 299 = V_LDEXP_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 300 = V_CVT_PKACCUM_U8_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 301 = V_CVT_PKNORM_I16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Sint32}, + // 302 = V_CVT_PKNORM_U16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 303 = V_CVT_PKRTZ_F16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Float32, ScalarType::Uint32}, + // 304 = V_CVT_PK_U16_U32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 305 = V_CVT_PK_I16_I32 + {InstClass::VectorConv, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, ScalarType::Sint32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 320 = V_MAD_LEGACY_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 321 = V_MAD_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 322 = V_MAD_I32_I24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 323 = V_MAD_U32_U24 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 324 = V_CUBEID_F32 + {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 325 = V_CUBESC_F32 + {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 326 = V_CUBETC_F32 + {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 327 = V_CUBEMA_F32 + {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 328 = V_BFE_U32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 329 = V_BFE_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 330 = V_BFI_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 331 = V_FMA_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 332 = V_FMA_F64 + {InstClass::VectorFpArith64, InstCategory::VectorALU, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 333 = V_LERP_U8 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 334 = V_ALIGNBIT_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 335 = V_ALIGNBYTE_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 336 = V_MULLIT_F32 + {InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 337 = V_MIN3_F32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 338 = V_MIN3_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 339 = V_MIN3_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 340 = V_MAX3_F32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 341 = V_MAX3_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 342 = V_MAX3_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 343 = V_MED3_F32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 344 = V_MED3_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 345 = V_MED3_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 346 = V_SAD_U8 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 347 = V_SAD_HI_U8 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 348 = V_SAD_U16 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 349 = V_SAD_U32 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 350 = V_CVT_PK_U8_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 3, 1, ScalarType::Float32, ScalarType::Uint32}, + // 351 = V_DIV_FIXUP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 352 = V_DIV_FIXUP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 353 = V_LSHL_B64 + {InstClass::VectorBitField64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 354 = V_LSHR_B64 + {InstClass::VectorBitField64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 355 = V_ASHR_I64 + {InstClass::VectorBitField64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 356 = V_ADD_F64 + {InstClass::VectorFpArith64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 357 = V_MUL_F64 + {InstClass::VectorFpArith64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 358 = V_MIN_F64 + {InstClass::VectorFpArith64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 359 = V_MAX_F64 + {InstClass::VectorFpArith64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 360 = V_LDEXP_F64 + {InstClass::VectorFpField64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 361 = V_MUL_LO_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 362 = V_MUL_HI_U32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 363 = V_MUL_LO_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 364 = V_MUL_HI_I32 + {InstClass::VectorIntArith32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 365 = V_DIV_SCALE_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 366 = V_DIV_SCALE_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 367 = V_DIV_FMAS_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 368 = V_DIV_FMAS_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 369 = V_MSAD_U8 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 370 = V_QSAD_U8 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 371 = V_MQSAD_U8 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 372 = V_TRIG_PREOP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 373 = V_MQSAD_U32_U8 + {InstClass::VectorIntGraph, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 374 = V_MAD_U64_U32 + {InstClass::VectorIntArith64, InstCategory::VectorALU, 3, 1, ScalarType::Uint32, + ScalarType::Uint64}, + // 375 = V_MAD_I64_I32 + {InstClass::VectorIntArith64, InstCategory::VectorALU, 3, 1, ScalarType::Sint32, + ScalarType::Sint64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 384 = V_NOP + {InstClass::VectorMisc, InstCategory::VectorALU, 0, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 385 = V_MOV_B32 + {InstClass::VectorRegMov, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 386 = V_READFIRSTLANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 387 = V_CVT_I32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Sint32}, + // 388 = V_CVT_F64_I32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, ScalarType::Float64}, + // 389 = V_CVT_F32_I32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, + ScalarType::Float32}, + // 390 = V_CVT_F32_U32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 391 = V_CVT_U32_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Uint32}, + // 392 = V_CVT_I32_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Sint32}, + // 393 = V_MOV_FED_B32 + {InstClass::Undefined, InstCategory::Undefined, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 394 = V_CVT_F16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float16}, + // 395 = V_CVT_F32_F16 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float16, + ScalarType::Float32}, + // 396 = V_CVT_RPI_I32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Sint32}, + // 397 = V_CVT_FLR_I32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Sint32}, + // 398 = V_CVT_OFF_F32_I4 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, ScalarType::Float32}, + // 399 = V_CVT_F32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float32}, + // 400 = V_CVT_F64_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float64}, + // 401 = V_CVT_F32_UBYTE0 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 402 = V_CVT_F32_UBYTE1 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 403 = V_CVT_F32_UBYTE2 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 404 = V_CVT_F32_UBYTE3 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 405 = V_CVT_U32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32}, + // 406 = V_CVT_F64_U32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 416 = V_FRACT_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 417 = V_TRUNC_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 418 = V_CEIL_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 419 = V_RNDNE_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 420 = V_FLOOR_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 421 = V_EXP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 422 = V_LOG_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 423 = V_LOG_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 424 = V_RCP_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 425 = V_RCP_LEGACY_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 426 = V_RCP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 427 = V_RCP_IFLAG_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 428 = V_RSQ_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 429 = V_RSQ_LEGACY_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 430 = V_RSQ_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 431 = V_RCP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 432 = V_RCP_CLAMP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 433 = V_RSQ_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 434 = V_RSQ_CLAMP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 435 = V_SQRT_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 436 = V_SQRT_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 437 = V_SIN_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 438 = V_COS_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 439 = V_NOT_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 440 = V_BFREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 441 = V_FFBH_U32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 442 = V_FFBL_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 443 = V_FFBH_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 444 = V_FREXP_EXP_I32_F64 + {InstClass::VectorFpField64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Sint32}, + // 445 = V_FREXP_MANT_F64 + {InstClass::VectorFpField64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 446 = V_FRACT_F64 + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 447 = V_FREXP_EXP_I32_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Sint32}, + // 448 = V_FREXP_MANT_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 449 = V_CLREXCP + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 450 = V_MOVRELD_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 451 = V_MOVRELS_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 452 = V_MOVRELSD_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, +}}; + +constexpr std::array InstructionFormatVOP1 = {{ + // 0 = V_NOP + {InstClass::VectorMisc, InstCategory::VectorALU, 0, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 1 = V_MOV_B32 + {InstClass::VectorRegMov, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 2 = V_READFIRSTLANE_B32 + {InstClass::VectorLane, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 3 = V_CVT_I32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Sint32}, + // 4 = V_CVT_F64_I32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, ScalarType::Float64}, + // 5 = V_CVT_F32_I32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, ScalarType::Float32}, + // 6 = V_CVT_F32_U32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float32}, + // 7 = V_CVT_U32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Uint32}, + // 8 = V_CVT_I32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Sint32}, + // 9 = V_MOV_FED_B32 + {InstClass::Undefined, InstCategory::Undefined, 1, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 10 = V_CVT_F16_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float16}, + // 11 = V_CVT_F32_F16 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float16, + ScalarType::Float32}, + // 12 = V_CVT_RPI_I32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Sint32}, + // 13 = V_CVT_FLR_I32_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, ScalarType::Sint32}, + // 14 = V_CVT_OFF_F32_I4 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, ScalarType::Float32}, + // 15 = V_CVT_F32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float32}, + // 16 = V_CVT_F64_F32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float64}, + // 17 = V_CVT_F32_UBYTE0 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 18 = V_CVT_F32_UBYTE1 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 19 = V_CVT_F32_UBYTE2 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 20 = V_CVT_F32_UBYTE3 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 21 = V_CVT_U32_F64 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Float64, ScalarType::Uint32}, + // 22 = V_CVT_F64_U32 + {InstClass::VectorConv, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 32 = V_FRACT_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 33 = V_TRUNC_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 34 = V_CEIL_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 35 = V_RNDNE_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 36 = V_FLOOR_F32 + {InstClass::VectorFpRound32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 37 = V_EXP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 38 = V_LOG_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 39 = V_LOG_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 40 = V_RCP_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 41 = V_RCP_LEGACY_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 42 = V_RCP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 43 = V_RCP_IFLAG_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 44 = V_RSQ_CLAMP_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 45 = V_RSQ_LEGACY_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 46 = V_RSQ_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 47 = V_RCP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 48 = V_RCP_CLAMP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 49 = V_RSQ_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 50 = V_RSQ_CLAMP_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 51 = V_SQRT_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 52 = V_SQRT_F64 + {InstClass::VectorFpTran64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 53 = V_SIN_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 54 = V_COS_F32 + {InstClass::VectorFpTran32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 55 = V_NOT_B32 + {InstClass::VectorBitLogic, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 56 = V_BFREV_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 57 = V_FFBH_U32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 58 = V_FFBL_B32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 59 = V_FFBH_I32 + {InstClass::VectorBitField32, InstCategory::VectorALU, 1, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 60 = V_FREXP_EXP_I32_F64 + {InstClass::VectorFpField64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Sint32}, + // 61 = V_FREXP_MANT_F64 + {InstClass::VectorFpField64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 62 = V_FRACT_F64 + {InstClass::VectorFpRound64, InstCategory::VectorALU, 1, 1, ScalarType::Float64, + ScalarType::Float64}, + // 63 = V_FREXP_EXP_I32_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Sint32}, + // 64 = V_FREXP_MANT_F32 + {InstClass::VectorFpField32, InstCategory::VectorALU, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 65 = V_CLREXCP + {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 66 = V_MOVRELD_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 67 = V_MOVRELS_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 68 = V_MOVRELSD_B32 + {InstClass::VectorMovRel, InstCategory::VectorALU, 1, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, +}}; + +constexpr std::array InstructionFormatVOPC = {{ + // 0 = V_CMP_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 1 = V_CMP_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 2 = V_CMP_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 3 = V_CMP_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 4 = V_CMP_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 5 = V_CMP_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 6 = V_CMP_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 7 = V_CMP_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 8 = V_CMP_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 9 = V_CMP_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 10 = V_CMP_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 11 = V_CMP_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 12 = V_CMP_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 13 = V_CMP_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 14 = V_CMP_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 15 = V_CMP_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 16 = V_CMPX_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 17 = V_CMPX_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 18 = V_CMPX_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 19 = V_CMPX_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 20 = V_CMPX_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 21 = V_CMPX_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 22 = V_CMPX_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 23 = V_CMPX_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 24 = V_CMPX_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 25 = V_CMPX_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 26 = V_CMPX_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 27 = V_CMPX_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 28 = V_CMPX_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 29 = V_CMPX_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 30 = V_CMPX_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 31 = V_CMPX_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 32 = V_CMP_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 33 = V_CMP_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 34 = V_CMP_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 35 = V_CMP_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 36 = V_CMP_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 37 = V_CMP_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 38 = V_CMP_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 39 = V_CMP_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 40 = V_CMP_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 41 = V_CMP_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 42 = V_CMP_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 43 = V_CMP_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 44 = V_CMP_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 45 = V_CMP_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 46 = V_CMP_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 47 = V_CMP_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 48 = V_CMPX_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 49 = V_CMPX_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 50 = V_CMPX_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 51 = V_CMPX_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 52 = V_CMPX_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 53 = V_CMPX_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 54 = V_CMPX_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 55 = V_CMPX_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 56 = V_CMPX_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 57 = V_CMPX_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 58 = V_CMPX_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 59 = V_CMPX_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 60 = V_CMPX_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 61 = V_CMPX_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 62 = V_CMPX_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 63 = V_CMPX_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 64 = V_CMPS_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 65 = V_CMPS_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 66 = V_CMPS_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 67 = V_CMPS_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 68 = V_CMPS_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 69 = V_CMPS_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 70 = V_CMPS_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 71 = V_CMPS_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 72 = V_CMPS_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 73 = V_CMPS_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 74 = V_CMPS_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 75 = V_CMPS_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 76 = V_CMPS_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 77 = V_CMPS_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 78 = V_CMPS_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 79 = V_CMPS_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 80 = V_CMPSX_F_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 81 = V_CMPSX_LT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 82 = V_CMPSX_EQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 83 = V_CMPSX_LE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 84 = V_CMPSX_GT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 85 = V_CMPSX_LG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 86 = V_CMPSX_GE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 87 = V_CMPSX_O_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 88 = V_CMPSX_U_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 89 = V_CMPSX_NGE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 90 = V_CMPSX_NLG_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 91 = V_CMPSX_NGT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 92 = V_CMPSX_NLE_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 93 = V_CMPSX_NEQ_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 94 = V_CMPSX_NLT_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 95 = V_CMPSX_TRU_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + // 96 = V_CMPS_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 97 = V_CMPS_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 98 = V_CMPS_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 99 = V_CMPS_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 100 = V_CMPS_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 101 = V_CMPS_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 102 = V_CMPS_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 103 = V_CMPS_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 104 = V_CMPS_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 105 = V_CMPS_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 106 = V_CMPS_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 107 = V_CMPS_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 108 = V_CMPS_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 109 = V_CMPS_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 110 = V_CMPS_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 111 = V_CMPS_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 112 = V_CMPSX_F_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 113 = V_CMPSX_LT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 114 = V_CMPSX_EQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 115 = V_CMPSX_LE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 116 = V_CMPSX_GT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 117 = V_CMPSX_LG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 118 = V_CMPSX_GE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 119 = V_CMPSX_O_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 120 = V_CMPSX_U_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 121 = V_CMPSX_NGE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 122 = V_CMPSX_NLG_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 123 = V_CMPSX_NGT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 124 = V_CMPSX_NLE_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 125 = V_CMPSX_NEQ_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 126 = V_CMPSX_NLT_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 127 = V_CMPSX_TRU_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + // 128 = V_CMP_F_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 129 = V_CMP_LT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 130 = V_CMP_EQ_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 131 = V_CMP_LE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 132 = V_CMP_GT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 133 = V_CMP_NE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 134 = V_CMP_GE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 135 = V_CMP_T_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 136 = V_CMP_CLASS_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 144 = V_CMPX_F_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 145 = V_CMPX_LT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 146 = V_CMPX_EQ_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 147 = V_CMPX_LE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 148 = V_CMPX_GT_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 149 = V_CMPX_NE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 150 = V_CMPX_GE_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 151 = V_CMPX_T_I32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 152 = V_CMPX_CLASS_F32 + {InstClass::VectorFpCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 160 = V_CMP_F_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 161 = V_CMP_LT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 162 = V_CMP_EQ_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 163 = V_CMP_LE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 164 = V_CMP_GT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 165 = V_CMP_NE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 166 = V_CMP_GE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 167 = V_CMP_T_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 168 = V_CMP_CLASS_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 176 = V_CMPX_F_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 177 = V_CMPX_LT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 178 = V_CMPX_EQ_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 179 = V_CMPX_LE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 180 = V_CMPX_GT_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 181 = V_CMPX_NE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 182 = V_CMPX_GE_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 183 = V_CMPX_T_I64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 184 = V_CMPX_CLASS_F64 + {InstClass::VectorFpCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 192 = V_CMP_F_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 193 = V_CMP_LT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 194 = V_CMP_EQ_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 195 = V_CMP_LE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 196 = V_CMP_GT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 197 = V_CMP_NE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 198 = V_CMP_GE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 199 = V_CMP_T_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 208 = V_CMPX_F_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 209 = V_CMPX_LT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 210 = V_CMPX_EQ_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 211 = V_CMPX_LE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 212 = V_CMPX_GT_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 213 = V_CMPX_NE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 214 = V_CMPX_GE_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 215 = V_CMPX_T_U32 + {InstClass::VectorIntCmp32, InstCategory::VectorALU, 2, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 224 = V_CMP_F_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 225 = V_CMP_LT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 226 = V_CMP_EQ_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 227 = V_CMP_LE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 228 = V_CMP_GT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 229 = V_CMP_NE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 230 = V_CMP_GE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 231 = V_CMP_T_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 240 = V_CMPX_F_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 241 = V_CMPX_LT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 242 = V_CMPX_EQ_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 243 = V_CMPX_LE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 244 = V_CMPX_GT_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 245 = V_CMPX_NE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 246 = V_CMPX_GE_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 247 = V_CMPX_T_U64 + {InstClass::VectorIntCmp64, InstCategory::VectorALU, 2, 1, ScalarType::Uint64, + ScalarType::Uint64}, +}}; + +constexpr std::array InstructionFormatVINTRP = {{ + // 0 = V_INTERP_P1_F32 + {InstClass::VectorInterpFpCache, InstCategory::VectorInterpolation, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 1 = V_INTERP_P2_F32 + {InstClass::VectorInterpFpCache, InstCategory::VectorInterpolation, 1, 1, ScalarType::Float32, + ScalarType::Float32}, + // 2 = V_INTERP_MOV_F32 + {InstClass::VectorInterpFpCache, InstCategory::VectorInterpolation, 1, 1, ScalarType::Float32, + ScalarType::Float32}, +}}; + +constexpr std::array InstructionFormatDS = {{ + // 0 = DS_ADD_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 1 = DS_SUB_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 2 = DS_RSUB_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 3 = DS_INC_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 4 = DS_DEC_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 5 = DS_MIN_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 6 = DS_MAX_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 7 = DS_MIN_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 8 = DS_MAX_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 9 = DS_AND_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 10 = DS_OR_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 11 = DS_XOR_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 12 = DS_MSKOR_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 13 = DS_WRITE_B32 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 14 = DS_WRITE2_B32 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 15 = DS_WRITE2ST64_B32 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 16 = DS_CMPST_B32 + {InstClass::DsAtomicCmpSt32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 17 = DS_CMPST_F32 + {InstClass::DsAtomicCmpSt32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 18 = DS_MIN_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 19 = DS_MAX_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 20 = DS_NOP + {InstClass::DsDataShareMisc, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + {}, + // 24 = DS_GWS_SEMA_RELEASE_ALL + {InstClass::GdsSync, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 25 = DS_GWS_INIT + {InstClass::GdsSync, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 26 = DS_GWS_SEMA_V + {InstClass::GdsSync, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 27 = DS_GWS_SEMA_BR + {InstClass::GdsSync, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 28 = DS_GWS_SEMA_P + {InstClass::GdsSync, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 29 = DS_GWS_BARRIER + {InstClass::ScalarSync, InstCategory::FlowControl, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 30 = DS_WRITE_B8 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 31 = DS_WRITE_B16 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 32 = DS_ADD_RTN_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 33 = DS_SUB_RTN_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 34 = DS_RSUB_RTN_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 35 = DS_INC_RTN_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 36 = DS_DEC_RTN_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 37 = DS_MIN_RTN_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 38 = DS_MAX_RTN_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 39 = DS_MIN_RTN_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 40 = DS_MAX_RTN_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 41 = DS_AND_RTN_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 42 = DS_OR_RTN_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 43 = DS_XOR_RTN_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 44 = DS_MSKOR_RTN_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 45 = DS_WRXCHG_RTN_B32 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 46 = DS_WRXCHG2_RTN_B32 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 47 = DS_WRXCHG2ST64_RTN_B32 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 48 = DS_CMPST_RTN_B32 + {InstClass::DsAtomicCmpSt32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 49 = DS_CMPST_RTN_F32 + {InstClass::DsAtomicCmpSt32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 50 = DS_MIN_RTN_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 51 = DS_MAX_RTN_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 52 = DS_WRAP_RTN_B32 + {InstClass::DsIdxWrap, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 53 = DS_SWIZZLE_B32 + {InstClass::DsDataShareUt, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 54 = DS_READ_B32 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 55 = DS_READ2_B32 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 56 = DS_READ2ST64_B32 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 57 = DS_READ_I8 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 58 = DS_READ_U8 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 59 = DS_READ_I16 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Sint32, ScalarType::Sint32}, + // 60 = DS_READ_U16 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + // 61 = DS_CONSUME + {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 62 = DS_APPEND + {InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 63 = DS_ORDERED_COUNT + {InstClass::GdsOrdCnt, InstCategory::DataShare, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 64 = DS_ADD_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 65 = DS_SUB_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 66 = DS_RSUB_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 67 = DS_INC_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 68 = DS_DEC_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 69 = DS_MIN_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 70 = DS_MAX_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 71 = DS_MIN_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 72 = DS_MAX_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 73 = DS_AND_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 74 = DS_OR_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 75 = DS_XOR_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 76 = DS_MSKOR_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 77 = DS_WRITE_B64 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 78 = DS_WRITE2_B64 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 79 = DS_WRITE2ST64_B64 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 80 = DS_CMPST_B64 + {InstClass::DsAtomicCmpSt64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 81 = DS_CMPST_F64 + {InstClass::DsAtomicCmpSt64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 82 = DS_MIN_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 83 = DS_MAX_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 96 = DS_ADD_RTN_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 97 = DS_SUB_RTN_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 98 = DS_RSUB_RTN_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 99 = DS_INC_RTN_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 100 = DS_DEC_RTN_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 101 = DS_MIN_RTN_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 102 = DS_MAX_RTN_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 103 = DS_MIN_RTN_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 104 = DS_MAX_RTN_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 105 = DS_AND_RTN_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 106 = DS_OR_RTN_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 107 = DS_XOR_RTN_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 108 = DS_MSKOR_RTN_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 109 = DS_WRXCHG_RTN_B64 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 110 = DS_WRXCHG2_RTN_B64 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 111 = DS_WRXCHG2ST64_RTN_B64 + {InstClass::DsIdxWrXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 112 = DS_CMPST_RTN_B64 + {InstClass::DsAtomicCmpSt64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 113 = DS_CMPST_RTN_F64 + {InstClass::DsAtomicCmpSt64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 114 = DS_MIN_RTN_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 115 = DS_MAX_RTN_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + // 118 = DS_READ_B64 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 119 = DS_READ2_B64 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + // 120 = DS_READ2ST64_B64 + {InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + {}, + {}, + {}, + {}, + {}, + // 126 = DS_CONDXCHG32_RTN_B64 + {InstClass::DsIdxCondXchg, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + // 128 = DS_ADD_SRC2_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 129 = DS_SUB_SRC2_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 130 = DS_RSUB_SRC2_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 131 = DS_INC_SRC2_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 132 = DS_DEC_SRC2_U32 + {InstClass::DsAtomicArith32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 133 = DS_MIN_SRC2_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 134 = DS_MAX_SRC2_I32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Sint32, + ScalarType::Sint32}, + // 135 = DS_MIN_SRC2_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 136 = DS_MAX_SRC2_U32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 137 = DS_AND_SRC2_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 138 = DS_OR_SRC2_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 139 = DS_XOR_SRC2_B32 + {InstClass::DsAtomicLogic32, InstCategory::DataShare, 3, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + // 141 = DS_WRITE_SRC2_B32 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32}, + {}, + {}, + {}, + {}, + // 146 = DS_MIN_SRC2_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 147 = DS_MAX_SRC2_F32 + {InstClass::DsAtomicMinMax32, InstCategory::DataShare, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 192 = DS_ADD_SRC2_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 193 = DS_SUB_SRC2_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 194 = DS_RSUB_SRC2_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 195 = DS_INC_SRC2_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 196 = DS_DEC_SRC2_U64 + {InstClass::DsAtomicArith64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 197 = DS_MIN_SRC2_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 198 = DS_MAX_SRC2_I64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Sint64, + ScalarType::Sint64}, + // 199 = DS_MIN_SRC2_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 200 = DS_MAX_SRC2_U64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 201 = DS_AND_SRC2_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 202 = DS_OR_SRC2_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + // 203 = DS_XOR_SRC2_B64 + {InstClass::DsAtomicLogic64, InstCategory::DataShare, 3, 1, ScalarType::Uint64, + ScalarType::Uint64}, + {}, + // 205 = DS_WRITE_SRC2_B64 + {InstClass::DsIdxWr, InstCategory::DataShare, 3, 1, ScalarType::Uint64, ScalarType::Uint64}, + {}, + {}, + {}, + {}, + // 210 = DS_MIN_SRC2_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + // 211 = DS_MAX_SRC2_F64 + {InstClass::DsAtomicMinMax64, InstCategory::DataShare, 3, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 222 = DS_WRITE_B96 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 223 = DS_WRITE_B128 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 253 = DS_CONDXCHG32_RTN_B128 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 254 = DS_READ_B96 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 255 = DS_READ_B128 + {InstClass::Undefined, InstCategory::Undefined, 3, 1, ScalarType::Undefined, + ScalarType::Undefined}, +}}; + +constexpr std::array InstructionFormatMUBUF = {{ + // 0 = BUFFER_LOAD_FORMAT_X + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 1 = BUFFER_LOAD_FORMAT_XY + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 2 = BUFFER_LOAD_FORMAT_XYZ + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 3 = BUFFER_LOAD_FORMAT_XYZW + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 4 = BUFFER_STORE_FORMAT_X + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 5 = BUFFER_STORE_FORMAT_XY + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 6 = BUFFER_STORE_FORMAT_XYZ + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 7 = BUFFER_STORE_FORMAT_XYZW + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 8 = BUFFER_LOAD_UBYTE + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 9 = BUFFER_LOAD_SBYTE + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 10 = BUFFER_LOAD_USHORT + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 11 = BUFFER_LOAD_SSHORT + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 12 = BUFFER_LOAD_DWORD + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 13 = BUFFER_LOAD_DWORDX2 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 14 = BUFFER_LOAD_DWORDX4 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 15 = BUFFER_LOAD_DWORDX3 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 24 = BUFFER_STORE_BYTE + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + // 26 = BUFFER_STORE_SHORT + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + // 28 = BUFFER_STORE_DWORD + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 29 = BUFFER_STORE_DWORDX2 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 30 = BUFFER_STORE_DWORDX4 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 31 = BUFFER_STORE_DWORDX3 + {InstClass::VectorMemBufNoFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 48 = BUFFER_ATOMIC_SWAP + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 49 = BUFFER_ATOMIC_CMPSWAP + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 50 = BUFFER_ATOMIC_ADD + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 51 = BUFFER_ATOMIC_SUB + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + {}, + // 53 = BUFFER_ATOMIC_SMIN + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 54 = BUFFER_ATOMIC_UMIN + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 55 = BUFFER_ATOMIC_SMAX + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 56 = BUFFER_ATOMIC_UMAX + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 57 = BUFFER_ATOMIC_AND + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 58 = BUFFER_ATOMIC_OR + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 59 = BUFFER_ATOMIC_XOR + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 60 = BUFFER_ATOMIC_INC + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 61 = BUFFER_ATOMIC_DEC + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 62 = BUFFER_ATOMIC_FCMPSWAP + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 63 = BUFFER_ATOMIC_FMIN + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 64 = BUFFER_ATOMIC_FMAX + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 80 = BUFFER_ATOMIC_SWAP_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 81 = BUFFER_ATOMIC_CMPSWAP_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 82 = BUFFER_ATOMIC_ADD_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 83 = BUFFER_ATOMIC_SUB_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + {}, + // 85 = BUFFER_ATOMIC_SMIN_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 86 = BUFFER_ATOMIC_UMIN_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 87 = BUFFER_ATOMIC_SMAX_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 88 = BUFFER_ATOMIC_UMAX_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 89 = BUFFER_ATOMIC_AND_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 90 = BUFFER_ATOMIC_OR_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 91 = BUFFER_ATOMIC_XOR_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 92 = BUFFER_ATOMIC_INC_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 93 = BUFFER_ATOMIC_DEC_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Uint64, + ScalarType::Uint32}, + // 94 = BUFFER_ATOMIC_FCMPSWAP_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float64, + ScalarType::Float64}, + // 95 = BUFFER_ATOMIC_FMIN_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float64, + ScalarType::Float64}, + // 96 = BUFFER_ATOMIC_FMAX_X2 + {InstClass::VectorMemBufAtomic, InstCategory::VectorMemory, 4, 1, ScalarType::Float64, + ScalarType::Float64}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 112 = BUFFER_WBINVL1_SC + {InstClass::VectorMemL1Cache, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 113 = BUFFER_WBINVL1 + {InstClass::VectorMemL1Cache, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, +}}; + +constexpr std::array InstructionFormatMTBUF = {{ + // 0 = TBUFFER_LOAD_FORMAT_X + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 1 = TBUFFER_LOAD_FORMAT_XY + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 2 = TBUFFER_LOAD_FORMAT_XYZ + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 3 = TBUFFER_LOAD_FORMAT_XYZW + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 4 = TBUFFER_STORE_FORMAT_X + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 5 = TBUFFER_STORE_FORMAT_XY + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 6 = TBUFFER_STORE_FORMAT_XYZ + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 7 = TBUFFER_STORE_FORMAT_XYZW + {InstClass::VectorMemBufFmt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, +}}; + +constexpr std::array InstructionFormatMIMG = {{ + // 0 = IMAGE_LOAD + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 1 = IMAGE_LOAD_MIP + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 2 = IMAGE_LOAD_PCK + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 3 = IMAGE_LOAD_PCK_SGN + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 4 = IMAGE_LOAD_MIP_PCK + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 5 = IMAGE_LOAD_MIP_PCK_SGN + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + {}, + // 8 = IMAGE_STORE + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 9 = IMAGE_STORE_MIP + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 10 = IMAGE_STORE_PCK + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 11 = IMAGE_STORE_MIP_PCK + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + {}, + {}, + // 14 = IMAGE_GET_RESINFO + {InstClass::VectorMemImgUt, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Uint32}, + // 15 = IMAGE_ATOMIC_SWAP + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 16 = IMAGE_ATOMIC_CMPSWAP + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 17 = IMAGE_ATOMIC_ADD + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 18 = IMAGE_ATOMIC_SUB + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + // 20 = IMAGE_ATOMIC_SMIN + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 21 = IMAGE_ATOMIC_UMIN + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 22 = IMAGE_ATOMIC_SMAX + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 23 = IMAGE_ATOMIC_UMAX + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 24 = IMAGE_ATOMIC_AND + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 25 = IMAGE_ATOMIC_OR + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 26 = IMAGE_ATOMIC_XOR + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 27 = IMAGE_ATOMIC_INC + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 28 = IMAGE_ATOMIC_DEC + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 29 = IMAGE_ATOMIC_FCMPSWAP + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 30 = IMAGE_ATOMIC_FMIN + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 31 = IMAGE_ATOMIC_FMAX + {InstClass::VectorMemImgNoSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 32 = IMAGE_SAMPLE + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 33 = IMAGE_SAMPLE_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 34 = IMAGE_SAMPLE_D + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 35 = IMAGE_SAMPLE_D_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 36 = IMAGE_SAMPLE_L + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 37 = IMAGE_SAMPLE_B + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 38 = IMAGE_SAMPLE_B_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 39 = IMAGE_SAMPLE_LZ + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 40 = IMAGE_SAMPLE_C + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 41 = IMAGE_SAMPLE_C_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 42 = IMAGE_SAMPLE_C_D + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 43 = IMAGE_SAMPLE_C_D_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 44 = IMAGE_SAMPLE_C_L + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 45 = IMAGE_SAMPLE_C_B + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 46 = IMAGE_SAMPLE_C_B_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 47 = IMAGE_SAMPLE_C_LZ + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 48 = IMAGE_SAMPLE_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 49 = IMAGE_SAMPLE_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 50 = IMAGE_SAMPLE_D_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 51 = IMAGE_SAMPLE_D_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 52 = IMAGE_SAMPLE_L_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 53 = IMAGE_SAMPLE_B_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 54 = IMAGE_SAMPLE_B_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 55 = IMAGE_SAMPLE_LZ_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 56 = IMAGE_SAMPLE_C_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 57 = IMAGE_SAMPLE_C_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 58 = IMAGE_SAMPLE_C_D_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 59 = IMAGE_SAMPLE_C_D_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 60 = IMAGE_SAMPLE_C_L_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 61 = IMAGE_SAMPLE_C_B_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 62 = IMAGE_SAMPLE_C_B_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 63 = IMAGE_SAMPLE_C_LZ_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + // 64 = IMAGE_GATHER4 + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 65 = IMAGE_GATHER4_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + // 68 = IMAGE_GATHER4_L + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 69 = IMAGE_GATHER4_B + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 70 = IMAGE_GATHER4_B_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 71 = IMAGE_GATHER4_LZ + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 72 = IMAGE_GATHER4_C + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 73 = IMAGE_GATHER4_C_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + // 76 = IMAGE_GATHER4_C_L + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 77 = IMAGE_GATHER4_C_B + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 78 = IMAGE_GATHER4_C_B_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 79 = IMAGE_GATHER4_C_LZ + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 80 = IMAGE_GATHER4_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 81 = IMAGE_GATHER4_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + // 84 = IMAGE_GATHER4_L_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 85 = IMAGE_GATHER4_B_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 86 = IMAGE_GATHER4_B_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 87 = IMAGE_GATHER4_LZ_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, + // 88 = IMAGE_GATHER4_C_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 89 = IMAGE_GATHER4_C_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + {}, + {}, + // 92 = IMAGE_GATHER4_C_L_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 93 = IMAGE_GATHER4_C_B_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 94 = IMAGE_GATHER4_C_B_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 95 = IMAGE_GATHER4_C_LZ_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 96 = IMAGE_GET_LOD + {InstClass::VectorMemImgUt, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, + ScalarType::Float32}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 104 = IMAGE_SAMPLE_CD + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 105 = IMAGE_SAMPLE_CD_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 106 = IMAGE_SAMPLE_C_CD + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 107 = IMAGE_SAMPLE_C_CD_CL + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 108 = IMAGE_SAMPLE_CD_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 109 = IMAGE_SAMPLE_CD_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 110 = IMAGE_SAMPLE_C_CD_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, + // 111 = IMAGE_SAMPLE_C_CD_CL_O + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, + ScalarType::Undefined}, +}}; + +constexpr std::array InstructionFormatEXP = {{ + {InstClass::Exp, InstCategory::Export, 4, 1, ScalarType::Float32, ScalarType::Any}, +}}; + +InstFormat InstructionFormat(InstEncoding encoding, uint32_t opcode) { + switch (encoding) { + case InstEncoding::SOP1: + return InstructionFormatSOP1[opcode]; + case InstEncoding::SOPP: + return InstructionFormatSOPP[opcode]; + case InstEncoding::SOPC: + return InstructionFormatSOPC[opcode]; + case InstEncoding::VOP1: + return InstructionFormatVOP1[opcode]; + case InstEncoding::VOPC: + return InstructionFormatVOPC[opcode]; + case InstEncoding::VOP3: + return InstructionFormatVOP3[opcode]; + case InstEncoding::EXP: + return InstructionFormatEXP[opcode]; + case InstEncoding::VINTRP: + return InstructionFormatVINTRP[opcode]; + case InstEncoding::DS: + return InstructionFormatDS[opcode]; + case InstEncoding::MUBUF: + return InstructionFormatMUBUF[opcode]; + case InstEncoding::MTBUF: + return InstructionFormatMTBUF[opcode]; + case InstEncoding::MIMG: + return InstructionFormatMIMG[opcode]; + case InstEncoding::SMRD: + return InstructionFormatSMRD[opcode]; + case InstEncoding::SOPK: + return InstructionFormatSOPK[opcode]; + case InstEncoding::SOP2: + return InstructionFormatSOP2[opcode]; + case InstEncoding::VOP2: + return InstructionFormatVOP2[opcode]; + } + UNREACHABLE(); + return {}; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/instruction.cpp b/src/shader_recompiler/frontend/instruction.cpp new file mode 100644 index 00000000..d4847708 --- /dev/null +++ b/src/shader_recompiler/frontend/instruction.cpp @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "shader_recompiler/frontend/instruction.h" + +namespace Shader::Gcn { + +u32 GcnInst::BranchTarget(u32 pc) const { + const s16 simm = static_cast(control.sopp.simm * 4); + const u32 target = pc + simm + 4; + return target; +} + +bool GcnInst::IsTerminateInstruction() const { + return IsUnconditionalBranch() || IsConditionalBranch() || IsFork() || + opcode == Opcode::S_ENDPGM; +} + +bool GcnInst::IsUnconditionalBranch() const { + return opcode == Opcode::S_BRANCH; +} + +bool GcnInst::IsFork() const { + return opcode == Opcode::S_CBRANCH_I_FORK || opcode == Opcode::S_CBRANCH_G_FORK || + opcode == Opcode::S_CBRANCH_JOIN; +} + +bool GcnInst::IsConditionalBranch() const { + switch (opcode) { + case Opcode::S_CBRANCH_SCC0: + case Opcode::S_CBRANCH_SCC1: + case Opcode::S_CBRANCH_VCCZ: + case Opcode::S_CBRANCH_VCCNZ: + case Opcode::S_CBRANCH_EXECZ: + case Opcode::S_CBRANCH_EXECNZ: + return true; + case Opcode::S_CBRANCH_CDBGSYS: + case Opcode::S_CBRANCH_CDBGUSER: + case Opcode::S_CBRANCH_CDBGSYS_OR_USER: + case Opcode::S_CBRANCH_CDBGSYS_AND_USER: + UNIMPLEMENTED(); + return true; + default: + break; + } + return false; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h new file mode 100644 index 00000000..22c2146c --- /dev/null +++ b/src/shader_recompiler/frontend/instruction.h @@ -0,0 +1,208 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/bit_field.h" +#include "shader_recompiler/frontend/opcodes.h" + +namespace Shader::Gcn { + +constexpr u32 GcnMaxSrcCount = 4; +constexpr u32 GcnMaxDstCount = 2; + +enum OperandFieldRange { + ScalarGPRMin = 0, + ScalarGPRMax = 103, + SignedConstIntPosMin = 129, + SignedConstIntPosMax = 192, + SignedConstIntNegMin = 193, + SignedConstIntNegMax = 208, + ConstFloatMin = 240, + VectorGPRMin = 256, + VectorGPRMax = 511 +}; + +/// These are applied after loading an operand register. +struct InputModifiers { + bool neg = false; + bool abs = false; +}; + +/// These are applied before storing an operand register. +struct OutputModifiers { + bool clamp = false; + float multiplier = std::numeric_limits::quiet_NaN(); +}; + +struct InstOperand { + OperandField field = OperandField::Undefined; + ScalarType type = ScalarType::Undefined; + InputModifiers input_modifier = {}; + OutputModifiers output_modifier = {}; + u32 code = 0xFFFFFFFF; +}; + +struct Operand { + OperandField field = OperandField::Undefined; + ScalarType type = ScalarType::Undefined; + union { + InputModifiers input_modifier = {}; + OutputModifiers output_modifier; + }; + u32 code = 0xFFFFFFFF; +}; + +struct InstSOPK { + u16 simm; +}; + +struct InstSOPP { + u16 simm; +}; + +struct InstVOP3 { + Operand vdst; + Operand src0; + Operand src1; + Operand src2; +}; + +struct SMRD { + u8 offset; + bool imm; + u8 sbase; +}; + +struct InstControlSOPK { + BitField<0, 16, u32> simm; +}; + +struct InstControlSOPP { + BitField<0, 16, u32> simm; +}; + +struct InstControlVOP3 { + u64 : 8; + u64 abs : 3; + u64 clmp : 1; + u64 : 47; + u64 omod : 2; + u64 neg : 3; +}; + +struct InstControlSMRD { + u32 offset : 8; + u32 imm : 1; + u32 count : 5; + u32 : 18; +}; + +struct InstControlMUBUF { + u64 offset : 12; + u64 offen : 1; + u64 idxen : 1; + u64 glc : 1; + u64 : 1; + u64 lds : 1; + u64 : 37; + u64 slc : 1; + u64 tfe : 1; + u64 count : 3; + u64 size : 5; +}; + +struct InstControlMTBUF { + u64 offset : 12; + u64 offen : 1; + u64 idxen : 1; + u64 glc : 1; + u64 : 4; + u64 dfmt : 4; + u64 nfmt : 3; + u64 : 28; + u64 slc : 1; + u64 tfe : 1; + u64 count : 3; + u64 size : 5; +}; + +struct InstControlMIMG { + u64 : 8; + u64 dmask : 4; + u64 unrm : 1; + u64 glc : 1; + u64 da : 1; + u64 r128 : 1; + u64 tfe : 1; + u64 lwe : 1; + u64 : 7; + u64 slc : 1; + u64 mod : 32; + u64 : 6; +}; + +struct InstControlDS { + u64 offset0 : 8; + u64 offset1 : 8; + u64 : 1; + u64 gds : 1; + u64 dual : 1; + u64 sign : 1; + u64 relative : 1; + u64 stride : 1; + u64 size : 4; + u64 : 38; +}; + +struct InstControlVINTRP { + u32 : 8; + u32 chan : 2; + u32 attr : 6; + u32 : 16; +}; + +struct InstControlEXP { + u64 en : 4; + u64 target : 6; + u64 compr : 1; + u64 done : 1; + u64 vm : 1; + u64 reserved : 51; +}; + +union InstControl { + InstControlSOPK sopk; + InstControlSOPP sopp; + InstControlVOP3 vop3; + InstControlSMRD smrd; + InstControlMUBUF mubuf; + InstControlMTBUF mtbuf; + InstControlMIMG mimg; + InstControlDS ds; + InstControlVINTRP vintrp; + InstControlEXP exp; +}; + +struct GcnInst { + Opcode opcode; + InstEncoding encoding; + InstClass inst_class; + InstCategory category; + InstControl control; + u32 length; + u32 src_count; + u32 dst_count; + std::array src; + std::array dst; + + u32 BranchTarget(u32 pc) const; + + bool IsTerminateInstruction() const; + bool IsUnconditionalBranch() const; + bool IsConditionalBranch() const; + bool IsFork() const; +}; + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/module.h b/src/shader_recompiler/frontend/module.h new file mode 100644 index 00000000..3901f021 --- /dev/null +++ b/src/shader_recompiler/frontend/module.h @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Shader::Gcn { + +void Translate(); + +} // namespace Shader::Gcn \ No newline at end of file diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h new file mode 100644 index 00000000..d38140d8 --- /dev/null +++ b/src/shader_recompiler/frontend/opcodes.h @@ -0,0 +1,2494 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/enum.h" +#include "common/types.h" + +namespace Shader::Gcn { + +enum class OpcodeSOP2 : u32 { + S_ADD_U32 = 0, + S_SUB_U32 = 1, + S_ADD_I32 = 2, + S_SUB_I32 = 3, + S_ADDC_U32 = 4, + S_SUBB_U32 = 5, + S_MIN_I32 = 6, + S_MIN_U32 = 7, + S_MAX_I32 = 8, + S_MAX_U32 = 9, + S_CSELECT_B32 = 10, + S_CSELECT_B64 = 11, + S_AND_B32 = 14, + S_AND_B64 = 15, + S_OR_B32 = 16, + S_OR_B64 = 17, + S_XOR_B32 = 18, + S_XOR_B64 = 19, + S_ANDN2_B32 = 20, + S_ANDN2_B64 = 21, + S_ORN2_B32 = 22, + S_ORN2_B64 = 23, + S_NAND_B32 = 24, + S_NAND_B64 = 25, + S_NOR_B32 = 26, + S_NOR_B64 = 27, + S_XNOR_B32 = 28, + S_XNOR_B64 = 29, + S_LSHL_B32 = 30, + S_LSHL_B64 = 31, + S_LSHR_B32 = 32, + S_LSHR_B64 = 33, + S_ASHR_I32 = 34, + S_ASHR_I64 = 35, + S_BFM_B32 = 36, + S_BFM_B64 = 37, + S_MUL_I32 = 38, + S_BFE_U32 = 39, + S_BFE_I32 = 40, + S_BFE_U64 = 41, + S_BFE_I64 = 42, + S_CBRANCH_G_FORK = 43, + S_ABSDIFF_I32 = 44, + + OP_RANGE_SOP2 = S_ABSDIFF_I32 + 1, +}; + +enum class OpcodeSOPK : u32 { + S_MOVK_I32 = 0, + S_CMOVK_I32 = 2, + S_CMPK_EQ_I32 = 3, + S_CMPK_LG_I32 = 4, + S_CMPK_GT_I32 = 5, + S_CMPK_GE_I32 = 6, + S_CMPK_LT_I32 = 7, + S_CMPK_LE_I32 = 8, + S_CMPK_EQ_U32 = 9, + S_CMPK_LG_U32 = 10, + S_CMPK_GT_U32 = 11, + S_CMPK_GE_U32 = 12, + S_CMPK_LT_U32 = 13, + S_CMPK_LE_U32 = 14, + S_ADDK_I32 = 15, + S_MULK_I32 = 16, + S_CBRANCH_I_FORK = 17, + S_GETREG_B32 = 18, + S_SETREG_B32 = 19, + S_GETREG_REGRD_B32 = 20, + S_SETREG_IMM32_B32 = 21, + + OP_RANGE_SOPK = S_SETREG_IMM32_B32 + 1, +}; + +enum class OpcodeSOP1 : u32 { + S_MOV_B32 = 3, + S_MOV_B64 = 4, + S_CMOV_B32 = 5, + S_CMOV_B64 = 6, + S_NOT_B32 = 7, + S_NOT_B64 = 8, + S_WQM_B32 = 9, + S_WQM_B64 = 10, + S_BREV_B32 = 11, + S_BREV_B64 = 12, + S_BCNT0_I32_B32 = 13, + S_BCNT0_I32_B64 = 14, + S_BCNT1_I32_B32 = 15, + S_BCNT1_I32_B64 = 16, + S_FF0_I32_B32 = 17, + S_FF0_I32_B64 = 18, + S_FF1_I32_B32 = 19, + S_FF1_I32_B64 = 20, + S_FLBIT_I32_B32 = 21, + S_FLBIT_I32_B64 = 22, + S_FLBIT_I32 = 23, + S_FLBIT_I32_I64 = 24, + S_SEXT_I32_I8 = 25, + S_SEXT_I32_I16 = 26, + S_BITSET0_B32 = 27, + S_BITSET0_B64 = 28, + S_BITSET1_B32 = 29, + S_BITSET1_B64 = 30, + S_GETPC_B64 = 31, + S_SETPC_B64 = 32, + S_SWAPPC_B64 = 33, + S_RFE_B64 = 34, + S_AND_SAVEEXEC_B64 = 36, + S_OR_SAVEEXEC_B64 = 37, + S_XOR_SAVEEXEC_B64 = 38, + S_ANDN2_SAVEEXEC_B64 = 39, + S_ORN2_SAVEEXEC_B64 = 40, + S_NAND_SAVEEXEC_B64 = 41, + S_NOR_SAVEEXEC_B64 = 42, + S_XNOR_SAVEEXEC_B64 = 43, + S_QUADMASK_B32 = 44, + S_QUADMASK_B64 = 45, + S_MOVRELS_B32 = 46, + S_MOVRELS_B64 = 47, + S_MOVRELD_B32 = 48, + S_MOVRELD_B64 = 49, + S_CBRANCH_JOIN = 50, + S_MOV_REGRD_B32 = 51, + S_ABS_I32 = 52, + S_MOV_FED_B32 = 53, + + OP_RANGE_SOP1 = S_MOV_FED_B32 + 1, +}; + +enum class OpcodeSOPC : u32 { + S_CMP_EQ_I32 = 0, + S_CMP_LG_I32 = 1, + S_CMP_GT_I32 = 2, + S_CMP_GE_I32 = 3, + S_CMP_LT_I32 = 4, + S_CMP_LE_I32 = 5, + S_CMP_EQ_U32 = 6, + S_CMP_LG_U32 = 7, + S_CMP_GT_U32 = 8, + S_CMP_GE_U32 = 9, + S_CMP_LT_U32 = 10, + S_CMP_LE_U32 = 11, + S_BITCMP0_B32 = 12, + S_BITCMP1_B32 = 13, + S_BITCMP0_B64 = 14, + S_BITCMP1_B64 = 15, + S_SETVSKIP = 16, + + OP_RANGE_SOPC = S_SETVSKIP + 1, +}; + +enum class OpcodeSOPP : u32 { + S_NOP = 0, + S_ENDPGM = 1, + S_BRANCH = 2, + S_CBRANCH_SCC0 = 4, + S_CBRANCH_SCC1 = 5, + S_CBRANCH_VCCZ = 6, + S_CBRANCH_VCCNZ = 7, + S_CBRANCH_EXECZ = 8, + S_CBRANCH_EXECNZ = 9, + S_BARRIER = 10, + S_SETKILL = 11, + S_WAITCNT = 12, + S_SETHALT = 13, + S_SLEEP = 14, + S_SETPRIO = 15, + S_SENDMSG = 16, + S_SENDMSGHALT = 17, + S_TRAP = 18, + S_ICACHE_INV = 19, + S_INCPERFLEVEL = 20, + S_DECPERFLEVEL = 21, + S_TTRACEDATA = 22, + S_CBRANCH_CDBGSYS = 23, + S_CBRANCH_CDBGUSER = 24, + S_CBRANCH_CDBGSYS_OR_USER = 25, + S_CBRANCH_CDBGSYS_AND_USER = 26, + + OP_RANGE_SOPP = S_CBRANCH_CDBGSYS_AND_USER + 1, +}; + +enum class OpcodeSMRD : u32 { + S_LOAD_DWORD = 0, + S_LOAD_DWORDX2 = 1, + S_LOAD_DWORDX4 = 2, + S_LOAD_DWORDX8 = 3, + S_LOAD_DWORDX16 = 4, + S_BUFFER_LOAD_DWORD = 8, + S_BUFFER_LOAD_DWORDX2 = 9, + S_BUFFER_LOAD_DWORDX4 = 10, + S_BUFFER_LOAD_DWORDX8 = 11, + S_BUFFER_LOAD_DWORDX16 = 12, + S_DCACHE_INV_VOL = 29, + S_MEMTIME = 30, + S_DCACHE_INV = 31, + + OP_RANGE_SMRD = S_DCACHE_INV + 1, +}; + +enum class OpcodeVOP2 : u32 { + V_CNDMASK_B32 = 0, + V_READLANE_B32 = 1, + V_WRITELANE_B32 = 2, + V_ADD_F32 = 3, + V_SUB_F32 = 4, + V_SUBREV_F32 = 5, + V_MAC_LEGACY_F32 = 6, + V_MUL_LEGACY_F32 = 7, + V_MUL_F32 = 8, + V_MUL_I32_I24 = 9, + V_MUL_HI_I32_I24 = 10, + V_MUL_U32_U24 = 11, + V_MUL_HI_U32_U24 = 12, + V_MIN_LEGACY_F32 = 13, + V_MAX_LEGACY_F32 = 14, + V_MIN_F32 = 15, + V_MAX_F32 = 16, + V_MIN_I32 = 17, + V_MAX_I32 = 18, + V_MIN_U32 = 19, + V_MAX_U32 = 20, + V_LSHR_B32 = 21, + V_LSHRREV_B32 = 22, + V_ASHR_I32 = 23, + V_ASHRREV_I32 = 24, + V_LSHL_B32 = 25, + V_LSHLREV_B32 = 26, + V_AND_B32 = 27, + V_OR_B32 = 28, + V_XOR_B32 = 29, + V_BFM_B32 = 30, + V_MAC_F32 = 31, + V_MADMK_F32 = 32, + V_MADAK_F32 = 33, + V_BCNT_U32_B32 = 34, + V_MBCNT_LO_U32_B32 = 35, + V_MBCNT_HI_U32_B32 = 36, + V_ADD_I32 = 37, + V_SUB_I32 = 38, + V_SUBREV_I32 = 39, + V_ADDC_U32 = 40, + V_SUBB_U32 = 41, + V_SUBBREV_U32 = 42, + V_LDEXP_F32 = 43, + V_CVT_PKACCUM_U8_F32 = 44, + V_CVT_PKNORM_I16_F32 = 45, + V_CVT_PKNORM_U16_F32 = 46, + V_CVT_PKRTZ_F16_F32 = 47, + V_CVT_PK_U16_U32 = 48, + V_CVT_PK_I16_I32 = 49, + + OP_RANGE_VOP2 = V_CVT_PK_I16_I32 + 1, +}; + +enum class OpcodeVOP3 : u32 { + V_CMP_F_F32 = 0, + V_CMP_LT_F32 = 1, + V_CMP_EQ_F32 = 2, + V_CMP_LE_F32 = 3, + V_CMP_GT_F32 = 4, + V_CMP_LG_F32 = 5, + V_CMP_GE_F32 = 6, + V_CMP_O_F32 = 7, + V_CMP_U_F32 = 8, + V_CMP_NGE_F32 = 9, + V_CMP_NLG_F32 = 10, + V_CMP_NGT_F32 = 11, + V_CMP_NLE_F32 = 12, + V_CMP_NEQ_F32 = 13, + V_CMP_NLT_F32 = 14, + V_CMP_TRU_F32 = 15, + V_CMP_T_F32 = 15, + V_CMPX_F_F32 = 16, + V_CMPX_LT_F32 = 17, + V_CMPX_EQ_F32 = 18, + V_CMPX_LE_F32 = 19, + V_CMPX_GT_F32 = 20, + V_CMPX_LG_F32 = 21, + V_CMPX_GE_F32 = 22, + V_CMPX_O_F32 = 23, + V_CMPX_U_F32 = 24, + V_CMPX_NGE_F32 = 25, + V_CMPX_NLG_F32 = 26, + V_CMPX_NGT_F32 = 27, + V_CMPX_NLE_F32 = 28, + V_CMPX_NEQ_F32 = 29, + V_CMPX_NLT_F32 = 30, + V_CMPX_TRU_F32 = 31, + V_CMPX_T_F32 = 31, + V_CMP_F_F64 = 32, + V_CMP_LT_F64 = 33, + V_CMP_EQ_F64 = 34, + V_CMP_LE_F64 = 35, + V_CMP_GT_F64 = 36, + V_CMP_LG_F64 = 37, + V_CMP_GE_F64 = 38, + V_CMP_O_F64 = 39, + V_CMP_U_F64 = 40, + V_CMP_NGE_F64 = 41, + V_CMP_NLG_F64 = 42, + V_CMP_NGT_F64 = 43, + V_CMP_NLE_F64 = 44, + V_CMP_NEQ_F64 = 45, + V_CMP_NLT_F64 = 46, + V_CMP_TRU_F64 = 47, + V_CMP_T_F64 = 47, + V_CMPX_F_F64 = 48, + V_CMPX_LT_F64 = 49, + V_CMPX_EQ_F64 = 50, + V_CMPX_LE_F64 = 51, + V_CMPX_GT_F64 = 52, + V_CMPX_LG_F64 = 53, + V_CMPX_GE_F64 = 54, + V_CMPX_O_F64 = 55, + V_CMPX_U_F64 = 56, + V_CMPX_NGE_F64 = 57, + V_CMPX_NLG_F64 = 58, + V_CMPX_NGT_F64 = 59, + V_CMPX_NLE_F64 = 60, + V_CMPX_NEQ_F64 = 61, + V_CMPX_NLT_F64 = 62, + V_CMPX_TRU_F64 = 63, + V_CMPX_T_F64 = 63, + V_CMPS_F_F32 = 64, + V_CMPS_LT_F32 = 65, + V_CMPS_EQ_F32 = 66, + V_CMPS_LE_F32 = 67, + V_CMPS_GT_F32 = 68, + V_CMPS_LG_F32 = 69, + V_CMPS_GE_F32 = 70, + V_CMPS_O_F32 = 71, + V_CMPS_U_F32 = 72, + V_CMPS_NGE_F32 = 73, + V_CMPS_NLG_F32 = 74, + V_CMPS_NGT_F32 = 75, + V_CMPS_NLE_F32 = 76, + V_CMPS_NEQ_F32 = 77, + V_CMPS_NLT_F32 = 78, + V_CMPS_TRU_F32 = 79, + V_CMPS_T_F32 = 79, + V_CMPSX_F_F32 = 80, + V_CMPSX_LT_F32 = 81, + V_CMPSX_EQ_F32 = 82, + V_CMPSX_LE_F32 = 83, + V_CMPSX_GT_F32 = 84, + V_CMPSX_LG_F32 = 85, + V_CMPSX_GE_F32 = 86, + V_CMPSX_O_F32 = 87, + V_CMPSX_U_F32 = 88, + V_CMPSX_NGE_F32 = 89, + V_CMPSX_NLG_F32 = 90, + V_CMPSX_NGT_F32 = 91, + V_CMPSX_NLE_F32 = 92, + V_CMPSX_NEQ_F32 = 93, + V_CMPSX_NLT_F32 = 94, + V_CMPSX_TRU_F32 = 95, + V_CMPSX_T_F32 = 95, + V_CMPS_F_F64 = 96, + V_CMPS_LT_F64 = 97, + V_CMPS_EQ_F64 = 98, + V_CMPS_LE_F64 = 99, + V_CMPS_GT_F64 = 100, + V_CMPS_LG_F64 = 101, + V_CMPS_GE_F64 = 102, + V_CMPS_O_F64 = 103, + V_CMPS_U_F64 = 104, + V_CMPS_NGE_F64 = 105, + V_CMPS_NLG_F64 = 106, + V_CMPS_NGT_F64 = 107, + V_CMPS_NLE_F64 = 108, + V_CMPS_NEQ_F64 = 109, + V_CMPS_NLT_F64 = 110, + V_CMPS_TRU_F64 = 111, + V_CMPS_T_F64 = 111, + V_CMPSX_F_F64 = 112, + V_CMPSX_LT_F64 = 113, + V_CMPSX_EQ_F64 = 114, + V_CMPSX_LE_F64 = 115, + V_CMPSX_GT_F64 = 116, + V_CMPSX_LG_F64 = 117, + V_CMPSX_GE_F64 = 118, + V_CMPSX_O_F64 = 119, + V_CMPSX_U_F64 = 120, + V_CMPSX_NGE_F64 = 121, + V_CMPSX_NLG_F64 = 122, + V_CMPSX_NGT_F64 = 123, + V_CMPSX_NLE_F64 = 124, + V_CMPSX_NEQ_F64 = 125, + V_CMPSX_NLT_F64 = 126, + V_CMPSX_TRU_F64 = 127, + V_CMPSX_T_F64 = 127, + V_CMP_F_I32 = 128, + V_CMP_LT_I32 = 129, + V_CMP_EQ_I32 = 130, + V_CMP_LE_I32 = 131, + V_CMP_GT_I32 = 132, + V_CMP_LG_I32 = 133, + V_CMP_NE_I32 = 133, + V_CMP_GE_I32 = 134, + V_CMP_TRU_I32 = 135, + V_CMP_T_I32 = 135, + V_CMP_CLASS_F32 = 136, + V_CMPX_F_I32 = 144, + V_CMPX_LT_I32 = 145, + V_CMPX_EQ_I32 = 146, + V_CMPX_LE_I32 = 147, + V_CMPX_GT_I32 = 148, + V_CMPX_LG_I32 = 149, + V_CMPX_NE_I32 = 149, + V_CMPX_GE_I32 = 150, + V_CMPX_TRU_I32 = 151, + V_CMPX_T_I32 = 151, + V_CMPX_CLASS_F32 = 152, + V_CMP_F_I64 = 160, + V_CMP_LT_I64 = 161, + V_CMP_EQ_I64 = 162, + V_CMP_LE_I64 = 163, + V_CMP_GT_I64 = 164, + V_CMP_LG_I64 = 165, + V_CMP_NE_I64 = 165, + V_CMP_GE_I64 = 166, + V_CMP_TRU_I64 = 167, + V_CMP_T_I64 = 167, + V_CMP_CLASS_F64 = 168, + V_CMPX_F_I64 = 176, + V_CMPX_LT_I64 = 177, + V_CMPX_EQ_I64 = 178, + V_CMPX_LE_I64 = 179, + V_CMPX_GT_I64 = 180, + V_CMPX_LG_I64 = 181, + V_CMPX_NE_I64 = 181, + V_CMPX_GE_I64 = 182, + V_CMPX_TRU_I64 = 183, + V_CMPX_T_I64 = 183, + V_CMPX_CLASS_F64 = 184, + V_CMP_F_U32 = 192, + V_CMP_LT_U32 = 193, + V_CMP_EQ_U32 = 194, + V_CMP_LE_U32 = 195, + V_CMP_GT_U32 = 196, + V_CMP_LG_U32 = 197, + V_CMP_NE_U32 = 197, + V_CMP_GE_U32 = 198, + V_CMP_TRU_U32 = 199, + V_CMP_T_U32 = 199, + V_CMPX_F_U32 = 208, + V_CMPX_LT_U32 = 209, + V_CMPX_EQ_U32 = 210, + V_CMPX_LE_U32 = 211, + V_CMPX_GT_U32 = 212, + V_CMPX_LG_U32 = 213, + V_CMPX_NE_U32 = 213, + V_CMPX_GE_U32 = 214, + V_CMPX_TRU_U32 = 215, + V_CMPX_T_U32 = 215, + V_CMP_F_U64 = 224, + V_CMP_LT_U64 = 225, + V_CMP_EQ_U64 = 226, + V_CMP_LE_U64 = 227, + V_CMP_GT_U64 = 228, + V_CMP_LG_U64 = 229, + V_CMP_NE_U64 = 229, + V_CMP_GE_U64 = 230, + V_CMP_TRU_U64 = 231, + V_CMP_T_U64 = 231, + V_CMPX_F_U64 = 240, + V_CMPX_LT_U64 = 241, + V_CMPX_EQ_U64 = 242, + V_CMPX_LE_U64 = 243, + V_CMPX_GT_U64 = 244, + V_CMPX_LG_U64 = 245, + V_CMPX_NE_U64 = 245, + V_CMPX_GE_U64 = 246, + V_CMPX_TRU_U64 = 247, + V_CMPX_T_U64 = 247, + V_CNDMASK_B32 = 256, + V_READLANE_B32 = 257, + V_WRITELANE_B32 = 258, + V_ADD_F32 = 259, + V_SUB_F32 = 260, + V_SUBREV_F32 = 261, + V_MAC_LEGACY_F32 = 262, + V_MUL_LEGACY_F32 = 263, + V_MUL_F32 = 264, + V_MUL_I32_I24 = 265, + V_MUL_HI_I32_I24 = 266, + V_MUL_U32_U24 = 267, + V_MUL_HI_U32_U24 = 268, + V_MIN_LEGACY_F32 = 269, + V_MAX_LEGACY_F32 = 270, + V_MIN_F32 = 271, + V_MAX_F32 = 272, + V_MIN_I32 = 273, + V_MAX_I32 = 274, + V_MIN_U32 = 275, + V_MAX_U32 = 276, + V_LSHR_B32 = 277, + V_LSHRREV_B32 = 278, + V_ASHR_I32 = 279, + V_ASHRREV_I32 = 280, + V_LSHL_B32 = 281, + V_LSHLREV_B32 = 282, + V_AND_B32 = 283, + V_OR_B32 = 284, + V_XOR_B32 = 285, + V_BFM_B32 = 286, + V_MAC_F32 = 287, + V_MADMK_F32 = 288, + V_MADAK_F32 = 289, + V_BCNT_U32_B32 = 290, + V_MBCNT_LO_U32_B32 = 291, + V_MBCNT_HI_U32_B32 = 292, + V_ADD_I32 = 293, + V_SUB_I32 = 294, + V_SUBREV_I32 = 295, + V_ADDC_U32 = 296, + V_SUBB_U32 = 297, + V_SUBBREV_U32 = 298, + V_LDEXP_F32 = 299, + V_CVT_PKACCUM_U8_F32 = 300, + V_CVT_PKNORM_I16_F32 = 301, + V_CVT_PKNORM_U16_F32 = 302, + V_CVT_PKRTZ_F16_F32 = 303, + V_CVT_PK_U16_U32 = 304, + V_CVT_PK_I16_I32 = 305, + V_MAD_LEGACY_F32 = 320, + V_MAD_F32 = 321, + V_MAD_I32_I24 = 322, + V_MAD_U32_U24 = 323, + V_CUBEID_F32 = 324, + V_CUBESC_F32 = 325, + V_CUBETC_F32 = 326, + V_CUBEMA_F32 = 327, + V_BFE_U32 = 328, + V_BFE_I32 = 329, + V_BFI_B32 = 330, + V_FMA_F32 = 331, + V_FMA_F64 = 332, + V_LERP_U8 = 333, + V_ALIGNBIT_B32 = 334, + V_ALIGNBYTE_B32 = 335, + V_MULLIT_F32 = 336, + V_MIN3_F32 = 337, + V_MIN3_I32 = 338, + V_MIN3_U32 = 339, + V_MAX3_F32 = 340, + V_MAX3_I32 = 341, + V_MAX3_U32 = 342, + V_MED3_F32 = 343, + V_MED3_I32 = 344, + V_MED3_U32 = 345, + V_SAD_U8 = 346, + V_SAD_HI_U8 = 347, + V_SAD_U16 = 348, + V_SAD_U32 = 349, + V_CVT_PK_U8_F32 = 350, + V_DIV_FIXUP_F32 = 351, + V_DIV_FIXUP_F64 = 352, + V_LSHL_B64 = 353, + V_LSHR_B64 = 354, + V_ASHR_I64 = 355, + V_ADD_F64 = 356, + V_MUL_F64 = 357, + V_MIN_F64 = 358, + V_MAX_F64 = 359, + V_LDEXP_F64 = 360, + V_MUL_LO_U32 = 361, + V_MUL_HI_U32 = 362, + V_MUL_LO_I32 = 363, + V_MUL_HI_I32 = 364, + V_DIV_SCALE_F32 = 365, + V_DIV_SCALE_F64 = 366, + V_DIV_FMAS_F32 = 367, + V_DIV_FMAS_F64 = 368, + V_MSAD_U8 = 369, + V_QSAD_U8 = 370, + V_QSAD_PK_U16_U8 = 370, + V_MQSAD_U8 = 371, + V_MQSAD_PK_U16_U8 = 371, + V_TRIG_PREOP_F64 = 372, + V_MQSAD_U32_U8 = 373, + V_MAD_U64_U32 = 374, + V_MAD_I64_I32 = 375, + V_NOP = 384, + V_MOV_B32 = 385, + V_READFIRSTLANE_B32 = 386, + V_CVT_I32_F64 = 387, + V_CVT_F64_I32 = 388, + V_CVT_F32_I32 = 389, + V_CVT_F32_U32 = 390, + V_CVT_U32_F32 = 391, + V_CVT_I32_F32 = 392, + V_MOV_FED_B32 = 393, + V_CVT_F16_F32 = 394, + V_CVT_F32_F16 = 395, + V_CVT_RPI_I32_F32 = 396, + V_CVT_FLR_I32_F32 = 397, + V_CVT_OFF_F32_I4 = 398, + V_CVT_F32_F64 = 399, + V_CVT_F64_F32 = 400, + V_CVT_F32_UBYTE0 = 401, + V_CVT_F32_UBYTE1 = 402, + V_CVT_F32_UBYTE2 = 403, + V_CVT_F32_UBYTE3 = 404, + V_CVT_U32_F64 = 405, + V_CVT_F64_U32 = 406, + V_TRUNC_F64 = 407, + V_CEIL_F64 = 408, + V_RNDNE_F64 = 409, + V_FLOOR_F64 = 410, + V_FRACT_F32 = 416, + V_TRUNC_F32 = 417, + V_CEIL_F32 = 418, + V_RNDNE_F32 = 419, + V_FLOOR_F32 = 420, + V_EXP_F32 = 421, + V_LOG_CLAMP_F32 = 422, + V_LOG_F32 = 423, + V_RCP_CLAMP_F32 = 424, + V_RCP_LEGACY_F32 = 425, + V_RCP_F32 = 426, + V_RCP_IFLAG_F32 = 427, + V_RSQ_CLAMP_F32 = 428, + V_RSQ_LEGACY_F32 = 429, + V_RSQ_F32 = 430, + V_RCP_F64 = 431, + V_RCP_CLAMP_F64 = 432, + V_RSQ_F64 = 433, + V_RSQ_CLAMP_F64 = 434, + V_SQRT_F32 = 435, + V_SQRT_F64 = 436, + V_SIN_F32 = 437, + V_COS_F32 = 438, + V_NOT_B32 = 439, + V_BFREV_B32 = 440, + V_FFBH_U32 = 441, + V_FFBL_B32 = 442, + V_FFBH_I32 = 443, + V_FREXP_EXP_I32_F64 = 444, + V_FREXP_MANT_F64 = 445, + V_FRACT_F64 = 446, + V_FREXP_EXP_I32_F32 = 447, + V_FREXP_MANT_F32 = 448, + V_CLREXCP = 449, + V_MOVRELD_B32 = 450, + V_MOVRELS_B32 = 451, + V_MOVRELSD_B32 = 452, + V_LOG_LEGACY_F32 = 453, + V_EXP_LEGACY_F32 = 454, + + OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1, +}; + +enum class OpcodeVOP1 : u32 { + V_NOP = 0, + V_MOV_B32 = 1, + V_READFIRSTLANE_B32 = 2, + V_CVT_I32_F64 = 3, + V_CVT_F64_I32 = 4, + V_CVT_F32_I32 = 5, + V_CVT_F32_U32 = 6, + V_CVT_U32_F32 = 7, + V_CVT_I32_F32 = 8, + V_MOV_FED_B32 = 9, + V_CVT_F16_F32 = 10, + V_CVT_F32_F16 = 11, + V_CVT_RPI_I32_F32 = 12, + V_CVT_FLR_I32_F32 = 13, + V_CVT_OFF_F32_I4 = 14, + V_CVT_F32_F64 = 15, + V_CVT_F64_F32 = 16, + V_CVT_F32_UBYTE0 = 17, + V_CVT_F32_UBYTE1 = 18, + V_CVT_F32_UBYTE2 = 19, + V_CVT_F32_UBYTE3 = 20, + V_CVT_U32_F64 = 21, + V_CVT_F64_U32 = 22, + V_TRUNC_F64 = 23, + V_CEIL_F64 = 24, + V_RNDNE_F64 = 25, + V_FLOOR_F64 = 26, + V_FRACT_F32 = 32, + V_TRUNC_F32 = 33, + V_CEIL_F32 = 34, + V_RNDNE_F32 = 35, + V_FLOOR_F32 = 36, + V_EXP_F32 = 37, + V_LOG_CLAMP_F32 = 38, + V_LOG_F32 = 39, + V_RCP_CLAMP_F32 = 40, + V_RCP_LEGACY_F32 = 41, + V_RCP_F32 = 42, + V_RCP_IFLAG_F32 = 43, + V_RSQ_CLAMP_F32 = 44, + V_RSQ_LEGACY_F32 = 45, + V_RSQ_F32 = 46, + V_RCP_F64 = 47, + V_RCP_CLAMP_F64 = 48, + V_RSQ_F64 = 49, + V_RSQ_CLAMP_F64 = 50, + V_SQRT_F32 = 51, + V_SQRT_F64 = 52, + V_SIN_F32 = 53, + V_COS_F32 = 54, + V_NOT_B32 = 55, + V_BFREV_B32 = 56, + V_FFBH_U32 = 57, + V_FFBL_B32 = 58, + V_FFBH_I32 = 59, + V_FREXP_EXP_I32_F64 = 60, + V_FREXP_MANT_F64 = 61, + V_FRACT_F64 = 62, + V_FREXP_EXP_I32_F32 = 63, + V_FREXP_MANT_F32 = 64, + V_CLREXCP = 65, + V_MOVRELD_B32 = 66, + V_MOVRELS_B32 = 67, + V_MOVRELSD_B32 = 68, + V_LOG_LEGACY_F32 = 69, + V_EXP_LEGACY_F32 = 70, + + OP_RANGE_VOP1 = V_EXP_LEGACY_F32 + 1, +}; + +enum class OpcodeVOPC : u32 { + V_CMP_F_F32 = 0, + V_CMP_LT_F32 = 1, + V_CMP_EQ_F32 = 2, + V_CMP_LE_F32 = 3, + V_CMP_GT_F32 = 4, + V_CMP_LG_F32 = 5, + V_CMP_GE_F32 = 6, + V_CMP_O_F32 = 7, + V_CMP_U_F32 = 8, + V_CMP_NGE_F32 = 9, + V_CMP_NLG_F32 = 10, + V_CMP_NGT_F32 = 11, + V_CMP_NLE_F32 = 12, + V_CMP_NEQ_F32 = 13, + V_CMP_NLT_F32 = 14, + V_CMP_TRU_F32 = 15, + V_CMP_T_F32 = 15, + V_CMPX_F_F32 = 16, + V_CMPX_LT_F32 = 17, + V_CMPX_EQ_F32 = 18, + V_CMPX_LE_F32 = 19, + V_CMPX_GT_F32 = 20, + V_CMPX_LG_F32 = 21, + V_CMPX_GE_F32 = 22, + V_CMPX_O_F32 = 23, + V_CMPX_U_F32 = 24, + V_CMPX_NGE_F32 = 25, + V_CMPX_NLG_F32 = 26, + V_CMPX_NGT_F32 = 27, + V_CMPX_NLE_F32 = 28, + V_CMPX_NEQ_F32 = 29, + V_CMPX_NLT_F32 = 30, + V_CMPX_TRU_F32 = 31, + V_CMPX_T_F32 = 31, + V_CMP_F_F64 = 32, + V_CMP_LT_F64 = 33, + V_CMP_EQ_F64 = 34, + V_CMP_LE_F64 = 35, + V_CMP_GT_F64 = 36, + V_CMP_LG_F64 = 37, + V_CMP_GE_F64 = 38, + V_CMP_O_F64 = 39, + V_CMP_U_F64 = 40, + V_CMP_NGE_F64 = 41, + V_CMP_NLG_F64 = 42, + V_CMP_NGT_F64 = 43, + V_CMP_NLE_F64 = 44, + V_CMP_NEQ_F64 = 45, + V_CMP_NLT_F64 = 46, + V_CMP_TRU_F64 = 47, + V_CMP_T_F64 = 47, + V_CMPX_F_F64 = 48, + V_CMPX_LT_F64 = 49, + V_CMPX_EQ_F64 = 50, + V_CMPX_LE_F64 = 51, + V_CMPX_GT_F64 = 52, + V_CMPX_LG_F64 = 53, + V_CMPX_GE_F64 = 54, + V_CMPX_O_F64 = 55, + V_CMPX_U_F64 = 56, + V_CMPX_NGE_F64 = 57, + V_CMPX_NLG_F64 = 58, + V_CMPX_NGT_F64 = 59, + V_CMPX_NLE_F64 = 60, + V_CMPX_NEQ_F64 = 61, + V_CMPX_NLT_F64 = 62, + V_CMPX_TRU_F64 = 63, + V_CMPX_T_F64 = 63, + V_CMPS_F_F32 = 64, + V_CMPS_LT_F32 = 65, + V_CMPS_EQ_F32 = 66, + V_CMPS_LE_F32 = 67, + V_CMPS_GT_F32 = 68, + V_CMPS_LG_F32 = 69, + V_CMPS_GE_F32 = 70, + V_CMPS_O_F32 = 71, + V_CMPS_U_F32 = 72, + V_CMPS_NGE_F32 = 73, + V_CMPS_NLG_F32 = 74, + V_CMPS_NGT_F32 = 75, + V_CMPS_NLE_F32 = 76, + V_CMPS_NEQ_F32 = 77, + V_CMPS_NLT_F32 = 78, + V_CMPS_TRU_F32 = 79, + V_CMPS_T_F32 = 79, + V_CMPSX_F_F32 = 80, + V_CMPSX_LT_F32 = 81, + V_CMPSX_EQ_F32 = 82, + V_CMPSX_LE_F32 = 83, + V_CMPSX_GT_F32 = 84, + V_CMPSX_LG_F32 = 85, + V_CMPSX_GE_F32 = 86, + V_CMPSX_O_F32 = 87, + V_CMPSX_U_F32 = 88, + V_CMPSX_NGE_F32 = 89, + V_CMPSX_NLG_F32 = 90, + V_CMPSX_NGT_F32 = 91, + V_CMPSX_NLE_F32 = 92, + V_CMPSX_NEQ_F32 = 93, + V_CMPSX_NLT_F32 = 94, + V_CMPSX_TRU_F32 = 95, + V_CMPSX_T_F32 = 95, + V_CMPS_F_F64 = 96, + V_CMPS_LT_F64 = 97, + V_CMPS_EQ_F64 = 98, + V_CMPS_LE_F64 = 99, + V_CMPS_GT_F64 = 100, + V_CMPS_LG_F64 = 101, + V_CMPS_GE_F64 = 102, + V_CMPS_O_F64 = 103, + V_CMPS_U_F64 = 104, + V_CMPS_NGE_F64 = 105, + V_CMPS_NLG_F64 = 106, + V_CMPS_NGT_F64 = 107, + V_CMPS_NLE_F64 = 108, + V_CMPS_NEQ_F64 = 109, + V_CMPS_NLT_F64 = 110, + V_CMPS_TRU_F64 = 111, + V_CMPS_T_F64 = 111, + V_CMPSX_F_F64 = 112, + V_CMPSX_LT_F64 = 113, + V_CMPSX_EQ_F64 = 114, + V_CMPSX_LE_F64 = 115, + V_CMPSX_GT_F64 = 116, + V_CMPSX_LG_F64 = 117, + V_CMPSX_GE_F64 = 118, + V_CMPSX_O_F64 = 119, + V_CMPSX_U_F64 = 120, + V_CMPSX_NGE_F64 = 121, + V_CMPSX_NLG_F64 = 122, + V_CMPSX_NGT_F64 = 123, + V_CMPSX_NLE_F64 = 124, + V_CMPSX_NEQ_F64 = 125, + V_CMPSX_NLT_F64 = 126, + V_CMPSX_TRU_F64 = 127, + V_CMPSX_T_F64 = 127, + V_CMP_F_I32 = 128, + V_CMP_LT_I32 = 129, + V_CMP_EQ_I32 = 130, + V_CMP_LE_I32 = 131, + V_CMP_GT_I32 = 132, + V_CMP_LG_I32 = 133, + V_CMP_NE_I32 = 133, + V_CMP_GE_I32 = 134, + V_CMP_TRU_I32 = 135, + V_CMP_T_I32 = 135, + V_CMP_CLASS_F32 = 136, + V_CMPX_F_I32 = 144, + V_CMPX_LT_I32 = 145, + V_CMPX_EQ_I32 = 146, + V_CMPX_LE_I32 = 147, + V_CMPX_GT_I32 = 148, + V_CMPX_LG_I32 = 149, + V_CMPX_NE_I32 = 149, + V_CMPX_GE_I32 = 150, + V_CMPX_TRU_I32 = 151, + V_CMPX_T_I32 = 151, + V_CMPX_CLASS_F32 = 152, + V_CMP_F_I64 = 160, + V_CMP_LT_I64 = 161, + V_CMP_EQ_I64 = 162, + V_CMP_LE_I64 = 163, + V_CMP_GT_I64 = 164, + V_CMP_LG_I64 = 165, + V_CMP_NE_I64 = 165, + V_CMP_GE_I64 = 166, + V_CMP_TRU_I64 = 167, + V_CMP_T_I64 = 167, + V_CMP_CLASS_F64 = 168, + V_CMPX_F_I64 = 176, + V_CMPX_LT_I64 = 177, + V_CMPX_EQ_I64 = 178, + V_CMPX_LE_I64 = 179, + V_CMPX_GT_I64 = 180, + V_CMPX_LG_I64 = 181, + V_CMPX_NE_I64 = 181, + V_CMPX_GE_I64 = 182, + V_CMPX_TRU_I64 = 183, + V_CMPX_T_I64 = 183, + V_CMPX_CLASS_F64 = 184, + V_CMP_F_U32 = 192, + V_CMP_LT_U32 = 193, + V_CMP_EQ_U32 = 194, + V_CMP_LE_U32 = 195, + V_CMP_GT_U32 = 196, + V_CMP_LG_U32 = 197, + V_CMP_NE_U32 = 197, + V_CMP_GE_U32 = 198, + V_CMP_TRU_U32 = 199, + V_CMP_T_U32 = 199, + V_CMPX_F_U32 = 208, + V_CMPX_LT_U32 = 209, + V_CMPX_EQ_U32 = 210, + V_CMPX_LE_U32 = 211, + V_CMPX_GT_U32 = 212, + V_CMPX_LG_U32 = 213, + V_CMPX_NE_U32 = 213, + V_CMPX_GE_U32 = 214, + V_CMPX_TRU_U32 = 215, + V_CMPX_T_U32 = 215, + V_CMP_F_U64 = 224, + V_CMP_LT_U64 = 225, + V_CMP_EQ_U64 = 226, + V_CMP_LE_U64 = 227, + V_CMP_GT_U64 = 228, + V_CMP_LG_U64 = 229, + V_CMP_NE_U64 = 229, + V_CMP_GE_U64 = 230, + V_CMP_TRU_U64 = 231, + V_CMP_T_U64 = 231, + V_CMPX_F_U64 = 240, + V_CMPX_LT_U64 = 241, + V_CMPX_EQ_U64 = 242, + V_CMPX_LE_U64 = 243, + V_CMPX_GT_U64 = 244, + V_CMPX_LG_U64 = 245, + V_CMPX_NE_U64 = 245, + V_CMPX_GE_U64 = 246, + V_CMPX_TRU_U64 = 247, + V_CMPX_T_U64 = 247, + + OP_RANGE_VOPC = V_CMPX_T_U64 + 1, +}; + +enum class OpcodeVINTRP : u32 { + V_INTERP_P1_F32 = 0, + V_INTERP_P2_F32 = 1, + V_INTERP_MOV_F32 = 2, + + OP_RANGE_VINTRP = V_INTERP_MOV_F32 + 1, +}; + +enum class OpcodeDS : u32 { + DS_ADD_U32 = 0, + DS_SUB_U32 = 1, + DS_RSUB_U32 = 2, + DS_INC_U32 = 3, + DS_DEC_U32 = 4, + DS_MIN_I32 = 5, + DS_MAX_I32 = 6, + DS_MIN_U32 = 7, + DS_MAX_U32 = 8, + DS_AND_B32 = 9, + DS_OR_B32 = 10, + DS_XOR_B32 = 11, + DS_MSKOR_B32 = 12, + DS_WRITE_B32 = 13, + DS_WRITE2_B32 = 14, + DS_WRITE2ST64_B32 = 15, + DS_CMPST_B32 = 16, + DS_CMPST_F32 = 17, + DS_MIN_F32 = 18, + DS_MAX_F32 = 19, + DS_NOP = 20, + DS_GWS_SEMA_RELEASE_ALL = 24, + DS_GWS_INIT = 25, + DS_GWS_SEMA_V = 26, + DS_GWS_SEMA_BR = 27, + DS_GWS_SEMA_P = 28, + DS_GWS_BARRIER = 29, + DS_WRITE_B8 = 30, + DS_WRITE_B16 = 31, + DS_ADD_RTN_U32 = 32, + DS_SUB_RTN_U32 = 33, + DS_RSUB_RTN_U32 = 34, + DS_INC_RTN_U32 = 35, + DS_DEC_RTN_U32 = 36, + DS_MIN_RTN_I32 = 37, + DS_MAX_RTN_I32 = 38, + DS_MIN_RTN_U32 = 39, + DS_MAX_RTN_U32 = 40, + DS_AND_RTN_B32 = 41, + DS_OR_RTN_B32 = 42, + DS_XOR_RTN_B32 = 43, + DS_MSKOR_RTN_B32 = 44, + DS_WRXCHG_RTN_B32 = 45, + DS_WRXCHG2_RTN_B32 = 46, + DS_WRXCHG2ST64_RTN_B32 = 47, + DS_CMPST_RTN_B32 = 48, + DS_CMPST_RTN_F32 = 49, + DS_MIN_RTN_F32 = 50, + DS_MAX_RTN_F32 = 51, + DS_WRAP_RTN_B32 = 52, + DS_SWIZZLE_B32 = 53, + DS_READ_B32 = 54, + DS_READ2_B32 = 55, + DS_READ2ST64_B32 = 56, + DS_READ_I8 = 57, + DS_READ_U8 = 58, + DS_READ_I16 = 59, + DS_READ_U16 = 60, + DS_CONSUME = 61, + DS_APPEND = 62, + DS_ORDERED_COUNT = 63, + DS_ADD_U64 = 64, + DS_SUB_U64 = 65, + DS_RSUB_U64 = 66, + DS_INC_U64 = 67, + DS_DEC_U64 = 68, + DS_MIN_I64 = 69, + DS_MAX_I64 = 70, + DS_MIN_U64 = 71, + DS_MAX_U64 = 72, + DS_AND_B64 = 73, + DS_OR_B64 = 74, + DS_XOR_B64 = 75, + DS_MSKOR_B64 = 76, + DS_WRITE_B64 = 77, + DS_WRITE2_B64 = 78, + DS_WRITE2ST64_B64 = 79, + DS_CMPST_B64 = 80, + DS_CMPST_F64 = 81, + DS_MIN_F64 = 82, + DS_MAX_F64 = 83, + DS_ADD_RTN_U64 = 96, + DS_SUB_RTN_U64 = 97, + DS_RSUB_RTN_U64 = 98, + DS_INC_RTN_U64 = 99, + DS_DEC_RTN_U64 = 100, + DS_MIN_RTN_I64 = 101, + DS_MAX_RTN_I64 = 102, + DS_MIN_RTN_U64 = 103, + DS_MAX_RTN_U64 = 104, + DS_AND_RTN_B64 = 105, + DS_OR_RTN_B64 = 106, + DS_XOR_RTN_B64 = 107, + DS_MSKOR_RTN_B64 = 108, + DS_WRXCHG_RTN_B64 = 109, + DS_WRXCHG2_RTN_B64 = 110, + DS_WRXCHG2ST64_RTN_B64 = 111, + DS_CMPST_RTN_B64 = 112, + DS_CMPST_RTN_F64 = 113, + DS_MIN_RTN_F64 = 114, + DS_MAX_RTN_F64 = 115, + DS_READ_B64 = 118, + DS_READ2_B64 = 119, + DS_READ2ST64_B64 = 120, + DS_CONDXCHG32_RTN_B64 = 126, + DS_ADD_SRC2_U32 = 128, + DS_SUB_SRC2_U32 = 129, + DS_RSUB_SRC2_U32 = 130, + DS_INC_SRC2_U32 = 131, + DS_DEC_SRC2_U32 = 132, + DS_MIN_SRC2_I32 = 133, + DS_MAX_SRC2_I32 = 134, + DS_MIN_SRC2_U32 = 135, + DS_MAX_SRC2_U32 = 136, + DS_AND_SRC2_B32 = 137, + DS_OR_SRC2_B32 = 138, + DS_XOR_SRC2_B32 = 139, + DS_WRITE_SRC2_B32 = 141, + DS_MIN_SRC2_F32 = 146, + DS_MAX_SRC2_F32 = 147, + DS_ADD_SRC2_U64 = 192, + DS_SUB_SRC2_U64 = 193, + DS_RSUB_SRC2_U64 = 194, + DS_INC_SRC2_U64 = 195, + DS_DEC_SRC2_U64 = 196, + DS_MIN_SRC2_I64 = 197, + DS_MAX_SRC2_I64 = 198, + DS_MIN_SRC2_U64 = 199, + DS_MAX_SRC2_U64 = 200, + DS_AND_SRC2_B64 = 201, + DS_OR_SRC2_B64 = 202, + DS_XOR_SRC2_B64 = 203, + DS_WRITE_SRC2_B64 = 205, + DS_MIN_SRC2_F64 = 210, + DS_MAX_SRC2_F64 = 211, + DS_WRITE_B96 = 222, + DS_WRITE_B128 = 223, + DS_CONDXCHG32_RTN_B128 = 253, + DS_READ_B96 = 254, + DS_READ_B128 = 255, + + OP_RANGE_DS = DS_READ_B128 + 1, +}; + +enum class OpcodeMUBUF : u32 { + BUFFER_LOAD_FORMAT_X = 0, + BUFFER_LOAD_FORMAT_XY = 1, + BUFFER_LOAD_FORMAT_XYZ = 2, + BUFFER_LOAD_FORMAT_XYZW = 3, + BUFFER_STORE_FORMAT_X = 4, + BUFFER_STORE_FORMAT_XY = 5, + BUFFER_STORE_FORMAT_XYZ = 6, + BUFFER_STORE_FORMAT_XYZW = 7, + BUFFER_LOAD_UBYTE = 8, + BUFFER_LOAD_SBYTE = 9, + BUFFER_LOAD_USHORT = 10, + BUFFER_LOAD_SSHORT = 11, + BUFFER_LOAD_DWORD = 12, + BUFFER_LOAD_DWORDX2 = 13, + BUFFER_LOAD_DWORDX4 = 14, + BUFFER_LOAD_DWORDX3 = 15, + BUFFER_STORE_BYTE = 24, + BUFFER_STORE_SHORT = 26, + BUFFER_STORE_DWORD = 28, + BUFFER_STORE_DWORDX2 = 29, + BUFFER_STORE_DWORDX4 = 30, + BUFFER_STORE_DWORDX3 = 31, + BUFFER_ATOMIC_SWAP = 48, + BUFFER_ATOMIC_CMPSWAP = 49, + BUFFER_ATOMIC_ADD = 50, + BUFFER_ATOMIC_SUB = 51, + BUFFER_ATOMIC_SMIN = 53, + BUFFER_ATOMIC_UMIN = 54, + BUFFER_ATOMIC_SMAX = 55, + BUFFER_ATOMIC_UMAX = 56, + BUFFER_ATOMIC_AND = 57, + BUFFER_ATOMIC_OR = 58, + BUFFER_ATOMIC_XOR = 59, + BUFFER_ATOMIC_INC = 60, + BUFFER_ATOMIC_DEC = 61, + BUFFER_ATOMIC_FCMPSWAP = 62, + BUFFER_ATOMIC_FMIN = 63, + BUFFER_ATOMIC_FMAX = 64, + BUFFER_ATOMIC_SWAP_X2 = 80, + BUFFER_ATOMIC_CMPSWAP_X2 = 81, + BUFFER_ATOMIC_ADD_X2 = 82, + BUFFER_ATOMIC_SUB_X2 = 83, + BUFFER_ATOMIC_SMIN_X2 = 85, + BUFFER_ATOMIC_UMIN_X2 = 86, + BUFFER_ATOMIC_SMAX_X2 = 87, + BUFFER_ATOMIC_UMAX_X2 = 88, + BUFFER_ATOMIC_AND_X2 = 89, + BUFFER_ATOMIC_OR_X2 = 90, + BUFFER_ATOMIC_XOR_X2 = 91, + BUFFER_ATOMIC_INC_X2 = 92, + BUFFER_ATOMIC_DEC_X2 = 93, + BUFFER_ATOMIC_FCMPSWAP_X2 = 94, + BUFFER_ATOMIC_FMIN_X2 = 95, + BUFFER_ATOMIC_FMAX_X2 = 96, + BUFFER_WBINVL1_SC = 112, + BUFFER_WBINVL1 = 113, + + OP_RANGE_MUBUF = BUFFER_WBINVL1 + 1, +}; + +enum class OpcodeMTBUF : u32 { + TBUFFER_LOAD_FORMAT_X = 0, + TBUFFER_LOAD_FORMAT_XY = 1, + TBUFFER_LOAD_FORMAT_XYZ = 2, + TBUFFER_LOAD_FORMAT_XYZW = 3, + TBUFFER_STORE_FORMAT_X = 4, + TBUFFER_STORE_FORMAT_XY = 5, + TBUFFER_STORE_FORMAT_XYZ = 6, + TBUFFER_STORE_FORMAT_XYZW = 7, + + OP_RANGE_MTBUF = TBUFFER_STORE_FORMAT_XYZW + 1, +}; + +enum class OpcodeMIMG : u32 { + IMAGE_LOAD = 0, + IMAGE_LOAD_MIP = 1, + IMAGE_LOAD_PCK = 2, + IMAGE_LOAD_PCK_SGN = 3, + IMAGE_LOAD_MIP_PCK = 4, + IMAGE_LOAD_MIP_PCK_SGN = 5, + IMAGE_STORE = 8, + IMAGE_STORE_MIP = 9, + IMAGE_STORE_PCK = 10, + IMAGE_STORE_MIP_PCK = 11, + IMAGE_GET_RESINFO = 14, + IMAGE_ATOMIC_SWAP = 15, + IMAGE_ATOMIC_CMPSWAP = 16, + IMAGE_ATOMIC_ADD = 17, + IMAGE_ATOMIC_SUB = 18, + IMAGE_ATOMIC_SMIN = 20, + IMAGE_ATOMIC_UMIN = 21, + IMAGE_ATOMIC_SMAX = 22, + IMAGE_ATOMIC_UMAX = 23, + IMAGE_ATOMIC_AND = 24, + IMAGE_ATOMIC_OR = 25, + IMAGE_ATOMIC_XOR = 26, + IMAGE_ATOMIC_INC = 27, + IMAGE_ATOMIC_DEC = 28, + IMAGE_ATOMIC_FCMPSWAP = 29, + IMAGE_ATOMIC_FMIN = 30, + IMAGE_ATOMIC_FMAX = 31, + IMAGE_SAMPLE = 32, + IMAGE_SAMPLE_CL = 33, + IMAGE_SAMPLE_D = 34, + IMAGE_SAMPLE_D_CL = 35, + IMAGE_SAMPLE_L = 36, + IMAGE_SAMPLE_B = 37, + IMAGE_SAMPLE_B_CL = 38, + IMAGE_SAMPLE_LZ = 39, + IMAGE_SAMPLE_C = 40, + IMAGE_SAMPLE_C_CL = 41, + IMAGE_SAMPLE_C_D = 42, + IMAGE_SAMPLE_C_D_CL = 43, + IMAGE_SAMPLE_C_L = 44, + IMAGE_SAMPLE_C_B = 45, + IMAGE_SAMPLE_C_B_CL = 46, + IMAGE_SAMPLE_C_LZ = 47, + IMAGE_SAMPLE_O = 48, + IMAGE_SAMPLE_CL_O = 49, + IMAGE_SAMPLE_D_O = 50, + IMAGE_SAMPLE_D_CL_O = 51, + IMAGE_SAMPLE_L_O = 52, + IMAGE_SAMPLE_B_O = 53, + IMAGE_SAMPLE_B_CL_O = 54, + IMAGE_SAMPLE_LZ_O = 55, + IMAGE_SAMPLE_C_O = 56, + IMAGE_SAMPLE_C_CL_O = 57, + IMAGE_SAMPLE_C_D_O = 58, + IMAGE_SAMPLE_C_D_CL_O = 59, + IMAGE_SAMPLE_C_L_O = 60, + IMAGE_SAMPLE_C_B_O = 61, + IMAGE_SAMPLE_C_B_CL_O = 62, + IMAGE_SAMPLE_C_LZ_O = 63, + IMAGE_GATHER4 = 64, + IMAGE_GATHER4_CL = 65, + IMAGE_GATHER4_L = 68, + IMAGE_GATHER4_B = 69, + IMAGE_GATHER4_B_CL = 70, + IMAGE_GATHER4_LZ = 71, + IMAGE_GATHER4_C = 72, + IMAGE_GATHER4_C_CL = 73, + IMAGE_GATHER4_C_L = 76, + IMAGE_GATHER4_C_B = 77, + IMAGE_GATHER4_C_B_CL = 78, + IMAGE_GATHER4_C_LZ = 79, + IMAGE_GATHER4_O = 80, + IMAGE_GATHER4_CL_O = 81, + IMAGE_GATHER4_L_O = 84, + IMAGE_GATHER4_B_O = 85, + IMAGE_GATHER4_B_CL_O = 86, + IMAGE_GATHER4_LZ_O = 87, + IMAGE_GATHER4_C_O = 88, + IMAGE_GATHER4_C_CL_O = 89, + IMAGE_GATHER4_C_L_O = 92, + IMAGE_GATHER4_C_B_O = 93, + IMAGE_GATHER4_C_B_CL_O = 94, + IMAGE_GATHER4_C_LZ_O = 95, + IMAGE_GET_LOD = 96, + IMAGE_SAMPLE_CD = 104, + IMAGE_SAMPLE_CD_CL = 105, + IMAGE_SAMPLE_C_CD = 106, + IMAGE_SAMPLE_C_CD_CL = 107, + IMAGE_SAMPLE_CD_O = 108, + IMAGE_SAMPLE_CD_CL_O = 109, + IMAGE_SAMPLE_C_CD_O = 110, + IMAGE_SAMPLE_C_CD_CL_O = 111, + + OP_RANGE_MIMG = IMAGE_SAMPLE_C_CD_CL_O + 1, +}; + +enum class OpcodeEXP : u32 { + EXP = 0, + + OP_RANGE_EXP = EXP + 1, +}; + +// Use to map VOP3 opcodes into VOP1, VOP2, VOPC +enum class OpMapVOP3VOPX : u32 { + VOP3_TO_VOPC = 0, + VOP3_TO_VOP2 = 256, + VOP3_TO_VOP1 = 384, +}; + +// Use to map all opcodes into a single enum +enum class OpcodeMap : u32 { + OP_MAP_SOP2 = 0, + OP_MAP_SOPK = OP_MAP_SOP2 + (u32)OpcodeSOP2::OP_RANGE_SOP2, + OP_MAP_SOP1 = OP_MAP_SOPK + (u32)OpcodeSOPK::OP_RANGE_SOPK, + OP_MAP_SOPC = OP_MAP_SOP1 + (u32)OpcodeSOP1::OP_RANGE_SOP1, + OP_MAP_SOPP = OP_MAP_SOPC + (u32)OpcodeSOPC::OP_RANGE_SOPC, + OP_MAP_VOPC = OP_MAP_SOPP + (u32)OpcodeSOPP::OP_RANGE_SOPP, + OP_MAP_VOP2 = OP_MAP_VOPC + (u32)OpcodeVOPC::OP_RANGE_VOPC, + OP_MAP_VOP1 = OP_MAP_VOP2 + (u32)OpcodeVOP2::OP_RANGE_VOP2, + OP_MAP_VOP3 = OP_MAP_VOP1 + (u32)OpcodeVOP1::OP_RANGE_VOP1, + OP_MAP_VINTRP = OP_MAP_VOP3 + (u32)OpcodeVOP3::OP_RANGE_VOP3, + OP_MAP_SMRD = OP_MAP_VINTRP + (u32)OpcodeVINTRP::OP_RANGE_VINTRP, + OP_MAP_DS = OP_MAP_SMRD + (u32)OpcodeSMRD::OP_RANGE_SMRD, + OP_MAP_MUBUF = OP_MAP_DS + (u32)OpcodeDS::OP_RANGE_DS, + OP_MAP_MTBUF = OP_MAP_MUBUF + (u32)OpcodeMUBUF::OP_RANGE_MUBUF, + OP_MAP_MIMG = OP_MAP_MTBUF + (u32)OpcodeMTBUF::OP_RANGE_MTBUF, + OP_MAP_EXP = OP_MAP_MIMG + (u32)OpcodeMIMG::OP_RANGE_MIMG, +}; + +enum class Opcode : u32 { + // SOP2 + S_ADD_U32 = 0 + (u32)OpcodeMap::OP_MAP_SOP2, + S_SUB_U32 = 1 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ADD_I32 = 2 + (u32)OpcodeMap::OP_MAP_SOP2, + S_SUB_I32 = 3 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ADDC_U32 = 4 + (u32)OpcodeMap::OP_MAP_SOP2, + S_SUBB_U32 = 5 + (u32)OpcodeMap::OP_MAP_SOP2, + S_MIN_I32 = 6 + (u32)OpcodeMap::OP_MAP_SOP2, + S_MIN_U32 = 7 + (u32)OpcodeMap::OP_MAP_SOP2, + S_MAX_I32 = 8 + (u32)OpcodeMap::OP_MAP_SOP2, + S_MAX_U32 = 9 + (u32)OpcodeMap::OP_MAP_SOP2, + S_CSELECT_B32 = 10 + (u32)OpcodeMap::OP_MAP_SOP2, + S_CSELECT_B64 = 11 + (u32)OpcodeMap::OP_MAP_SOP2, + S_AND_B32 = 14 + (u32)OpcodeMap::OP_MAP_SOP2, + S_AND_B64 = 15 + (u32)OpcodeMap::OP_MAP_SOP2, + S_OR_B32 = 16 + (u32)OpcodeMap::OP_MAP_SOP2, + S_OR_B64 = 17 + (u32)OpcodeMap::OP_MAP_SOP2, + S_XOR_B32 = 18 + (u32)OpcodeMap::OP_MAP_SOP2, + S_XOR_B64 = 19 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ANDN2_B32 = 20 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ANDN2_B64 = 21 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ORN2_B32 = 22 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ORN2_B64 = 23 + (u32)OpcodeMap::OP_MAP_SOP2, + S_NAND_B32 = 24 + (u32)OpcodeMap::OP_MAP_SOP2, + S_NAND_B64 = 25 + (u32)OpcodeMap::OP_MAP_SOP2, + S_NOR_B32 = 26 + (u32)OpcodeMap::OP_MAP_SOP2, + S_NOR_B64 = 27 + (u32)OpcodeMap::OP_MAP_SOP2, + S_XNOR_B32 = 28 + (u32)OpcodeMap::OP_MAP_SOP2, + S_XNOR_B64 = 29 + (u32)OpcodeMap::OP_MAP_SOP2, + S_LSHL_B32 = 30 + (u32)OpcodeMap::OP_MAP_SOP2, + S_LSHL_B64 = 31 + (u32)OpcodeMap::OP_MAP_SOP2, + S_LSHR_B32 = 32 + (u32)OpcodeMap::OP_MAP_SOP2, + S_LSHR_B64 = 33 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ASHR_I32 = 34 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ASHR_I64 = 35 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFM_B32 = 36 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFM_B64 = 37 + (u32)OpcodeMap::OP_MAP_SOP2, + S_MUL_I32 = 38 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFE_U32 = 39 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFE_I32 = 40 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFE_U64 = 41 + (u32)OpcodeMap::OP_MAP_SOP2, + S_BFE_I64 = 42 + (u32)OpcodeMap::OP_MAP_SOP2, + S_CBRANCH_G_FORK = 43 + (u32)OpcodeMap::OP_MAP_SOP2, + S_ABSDIFF_I32 = 44 + (u32)OpcodeMap::OP_MAP_SOP2, + // SOPK + S_MOVK_I32 = 0 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMOVK_I32 = 2 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_EQ_I32 = 3 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LG_I32 = 4 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_GT_I32 = 5 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_GE_I32 = 6 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LT_I32 = 7 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LE_I32 = 8 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_EQ_U32 = 9 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LG_U32 = 10 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_GT_U32 = 11 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_GE_U32 = 12 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LT_U32 = 13 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CMPK_LE_U32 = 14 + (u32)OpcodeMap::OP_MAP_SOPK, + S_ADDK_I32 = 15 + (u32)OpcodeMap::OP_MAP_SOPK, + S_MULK_I32 = 16 + (u32)OpcodeMap::OP_MAP_SOPK, + S_CBRANCH_I_FORK = 17 + (u32)OpcodeMap::OP_MAP_SOPK, + S_GETREG_B32 = 18 + (u32)OpcodeMap::OP_MAP_SOPK, + S_SETREG_B32 = 19 + (u32)OpcodeMap::OP_MAP_SOPK, + S_GETREG_REGRD_B32 = 20 + (u32)OpcodeMap::OP_MAP_SOPK, + S_SETREG_IMM32_B32 = 21 + (u32)OpcodeMap::OP_MAP_SOPK, + // SOP1 + S_MOV_B32 = 3 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOV_B64 = 4 + (u32)OpcodeMap::OP_MAP_SOP1, + S_CMOV_B32 = 5 + (u32)OpcodeMap::OP_MAP_SOP1, + S_CMOV_B64 = 6 + (u32)OpcodeMap::OP_MAP_SOP1, + S_NOT_B32 = 7 + (u32)OpcodeMap::OP_MAP_SOP1, + S_NOT_B64 = 8 + (u32)OpcodeMap::OP_MAP_SOP1, + S_WQM_B32 = 9 + (u32)OpcodeMap::OP_MAP_SOP1, + S_WQM_B64 = 10 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BREV_B32 = 11 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BREV_B64 = 12 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BCNT0_I32_B32 = 13 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BCNT0_I32_B64 = 14 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BCNT1_I32_B32 = 15 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BCNT1_I32_B64 = 16 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FF0_I32_B32 = 17 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FF0_I32_B64 = 18 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FF1_I32_B32 = 19 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FF1_I32_B64 = 20 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FLBIT_I32_B32 = 21 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FLBIT_I32_B64 = 22 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FLBIT_I32 = 23 + (u32)OpcodeMap::OP_MAP_SOP1, + S_FLBIT_I32_I64 = 24 + (u32)OpcodeMap::OP_MAP_SOP1, + S_SEXT_I32_I8 = 25 + (u32)OpcodeMap::OP_MAP_SOP1, + S_SEXT_I32_I16 = 26 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BITSET0_B32 = 27 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BITSET0_B64 = 28 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BITSET1_B32 = 29 + (u32)OpcodeMap::OP_MAP_SOP1, + S_BITSET1_B64 = 30 + (u32)OpcodeMap::OP_MAP_SOP1, + S_GETPC_B64 = 31 + (u32)OpcodeMap::OP_MAP_SOP1, + S_SETPC_B64 = 32 + (u32)OpcodeMap::OP_MAP_SOP1, + S_SWAPPC_B64 = 33 + (u32)OpcodeMap::OP_MAP_SOP1, + S_RFE_B64 = 34 + (u32)OpcodeMap::OP_MAP_SOP1, + S_AND_SAVEEXEC_B64 = 36 + (u32)OpcodeMap::OP_MAP_SOP1, + S_OR_SAVEEXEC_B64 = 37 + (u32)OpcodeMap::OP_MAP_SOP1, + S_XOR_SAVEEXEC_B64 = 38 + (u32)OpcodeMap::OP_MAP_SOP1, + S_ANDN2_SAVEEXEC_B64 = 39 + (u32)OpcodeMap::OP_MAP_SOP1, + S_ORN2_SAVEEXEC_B64 = 40 + (u32)OpcodeMap::OP_MAP_SOP1, + S_NAND_SAVEEXEC_B64 = 41 + (u32)OpcodeMap::OP_MAP_SOP1, + S_NOR_SAVEEXEC_B64 = 42 + (u32)OpcodeMap::OP_MAP_SOP1, + S_XNOR_SAVEEXEC_B64 = 43 + (u32)OpcodeMap::OP_MAP_SOP1, + S_QUADMASK_B32 = 44 + (u32)OpcodeMap::OP_MAP_SOP1, + S_QUADMASK_B64 = 45 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOVRELS_B32 = 46 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOVRELS_B64 = 47 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOVRELD_B32 = 48 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOVRELD_B64 = 49 + (u32)OpcodeMap::OP_MAP_SOP1, + S_CBRANCH_JOIN = 50 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOV_REGRD_B32 = 51 + (u32)OpcodeMap::OP_MAP_SOP1, + S_ABS_I32 = 52 + (u32)OpcodeMap::OP_MAP_SOP1, + S_MOV_FED_B32 = 53 + (u32)OpcodeMap::OP_MAP_SOP1, + // SOPC + S_CMP_EQ_I32 = 0 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LG_I32 = 1 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_GT_I32 = 2 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_GE_I32 = 3 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LT_I32 = 4 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LE_I32 = 5 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_EQ_U32 = 6 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LG_U32 = 7 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_GT_U32 = 8 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_GE_U32 = 9 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LT_U32 = 10 + (u32)OpcodeMap::OP_MAP_SOPC, + S_CMP_LE_U32 = 11 + (u32)OpcodeMap::OP_MAP_SOPC, + S_BITCMP0_B32 = 12 + (u32)OpcodeMap::OP_MAP_SOPC, + S_BITCMP1_B32 = 13 + (u32)OpcodeMap::OP_MAP_SOPC, + S_BITCMP0_B64 = 14 + (u32)OpcodeMap::OP_MAP_SOPC, + S_BITCMP1_B64 = 15 + (u32)OpcodeMap::OP_MAP_SOPC, + S_SETVSKIP = 16 + (u32)OpcodeMap::OP_MAP_SOPC, + // SOPP + S_NOP = 0 + (u32)OpcodeMap::OP_MAP_SOPP, + S_ENDPGM = 1 + (u32)OpcodeMap::OP_MAP_SOPP, + S_BRANCH = 2 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_SCC0 = 4 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_SCC1 = 5 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_VCCZ = 6 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_VCCNZ = 7 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_EXECZ = 8 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_EXECNZ = 9 + (u32)OpcodeMap::OP_MAP_SOPP, + S_BARRIER = 10 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SETKILL = 11 + (u32)OpcodeMap::OP_MAP_SOPP, + S_WAITCNT = 12 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SETHALT = 13 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SLEEP = 14 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SETPRIO = 15 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SENDMSG = 16 + (u32)OpcodeMap::OP_MAP_SOPP, + S_SENDMSGHALT = 17 + (u32)OpcodeMap::OP_MAP_SOPP, + S_TRAP = 18 + (u32)OpcodeMap::OP_MAP_SOPP, + S_ICACHE_INV = 19 + (u32)OpcodeMap::OP_MAP_SOPP, + S_INCPERFLEVEL = 20 + (u32)OpcodeMap::OP_MAP_SOPP, + S_DECPERFLEVEL = 21 + (u32)OpcodeMap::OP_MAP_SOPP, + S_TTRACEDATA = 22 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_CDBGSYS = 23 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_CDBGUSER = 24 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_CDBGSYS_OR_USER = 25 + (u32)OpcodeMap::OP_MAP_SOPP, + S_CBRANCH_CDBGSYS_AND_USER = 26 + (u32)OpcodeMap::OP_MAP_SOPP, + // VOPC + V_CMP_F_F32 = 0 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_F32 = 1 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_F32 = 2 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_F32 = 3 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_F32 = 4 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LG_F32 = 5 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_F32 = 6 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_O_F32 = 7 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_U_F32 = 8 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NGE_F32 = 9 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLG_F32 = 10 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NGT_F32 = 11 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLE_F32 = 12 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NEQ_F32 = 13 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLT_F32 = 14 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_F32 = 15 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_F32 = 15 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_F32 = 16 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_F32 = 17 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_F32 = 18 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_F32 = 19 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_F32 = 20 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LG_F32 = 21 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_F32 = 22 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_O_F32 = 23 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_U_F32 = 24 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NGE_F32 = 25 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLG_F32 = 26 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NGT_F32 = 27 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLE_F32 = 28 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NEQ_F32 = 29 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLT_F32 = 30 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_F32 = 31 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_F32 = 31 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_F64 = 32 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_F64 = 33 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_F64 = 34 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_F64 = 35 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_F64 = 36 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LG_F64 = 37 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_F64 = 38 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_O_F64 = 39 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_U_F64 = 40 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NGE_F64 = 41 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLG_F64 = 42 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NGT_F64 = 43 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLE_F64 = 44 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NEQ_F64 = 45 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NLT_F64 = 46 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_F64 = 47 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_F64 = 47 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_F64 = 48 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_F64 = 49 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_F64 = 50 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_F64 = 51 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_F64 = 52 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LG_F64 = 53 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_F64 = 54 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_O_F64 = 55 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_U_F64 = 56 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NGE_F64 = 57 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLG_F64 = 58 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NGT_F64 = 59 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLE_F64 = 60 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NEQ_F64 = 61 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NLT_F64 = 62 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_F64 = 63 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_F64 = 63 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_F_F32 = 64 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LT_F32 = 65 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_EQ_F32 = 66 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LE_F32 = 67 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_GT_F32 = 68 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LG_F32 = 69 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_GE_F32 = 70 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_O_F32 = 71 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_U_F32 = 72 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NGE_F32 = 73 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLG_F32 = 74 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NGT_F32 = 75 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLE_F32 = 76 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NEQ_F32 = 77 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLT_F32 = 78 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_TRU_F32 = 79 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_T_F32 = 79 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_F_F32 = 80 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LT_F32 = 81 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_EQ_F32 = 82 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LE_F32 = 83 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_GT_F32 = 84 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LG_F32 = 85 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_GE_F32 = 86 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_O_F32 = 87 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_U_F32 = 88 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NGE_F32 = 89 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLG_F32 = 90 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NGT_F32 = 91 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLE_F32 = 92 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NEQ_F32 = 93 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLT_F32 = 94 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_TRU_F32 = 95 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_T_F32 = 95 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_F_F64 = 96 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LT_F64 = 97 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_EQ_F64 = 98 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LE_F64 = 99 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_GT_F64 = 100 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_LG_F64 = 101 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_GE_F64 = 102 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_O_F64 = 103 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_U_F64 = 104 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NGE_F64 = 105 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLG_F64 = 106 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NGT_F64 = 107 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLE_F64 = 108 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NEQ_F64 = 109 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_NLT_F64 = 110 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_TRU_F64 = 111 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPS_T_F64 = 111 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_F_F64 = 112 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LT_F64 = 113 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_EQ_F64 = 114 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LE_F64 = 115 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_GT_F64 = 116 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_LG_F64 = 117 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_GE_F64 = 118 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_O_F64 = 119 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_U_F64 = 120 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NGE_F64 = 121 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLG_F64 = 122 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NGT_F64 = 123 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLE_F64 = 124 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NEQ_F64 = 125 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_NLT_F64 = 126 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_TRU_F64 = 127 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPSX_T_F64 = 127 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_I32 = 128 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_I32 = 129 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_I32 = 130 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_I32 = 131 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_I32 = 132 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_I32 = 133 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_I32 = 134 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_CLASS_F32 = 136 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_I32 = 144 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_I32 = 145 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_I32 = 146 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_I32 = 147 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_I32 = 148 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LG_I32 = 149 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NE_I32 = 149 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_I32 = 150 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_I32 = 151 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_I32 = 151 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_CLASS_F32 = 152 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_I64 = 160 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_I64 = 161 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_I64 = 162 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_I64 = 163 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_I64 = 164 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LG_I64 = 165 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_I64 = 165 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_I64 = 166 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_CLASS_F64 = 168 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_I64 = 176 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_I64 = 177 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_I64 = 178 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_I64 = 179 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_I64 = 180 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LG_I64 = 181 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NE_I64 = 181 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_I64 = 182 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_I64 = 183 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_I64 = 183 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_CLASS_F64 = 184 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_U32 = 192 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_U32 = 193 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_U32 = 194 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_U32 = 195 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_U32 = 196 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_U32 = 197 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_U32 = 198 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_U32 = 208 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_U32 = 209 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_U32 = 210 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_U32 = 211 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_U32 = 212 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NE_U32 = 213 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_U32 = 214 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_U32 = 215 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_U32 = 215 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_U64 = 224 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_U64 = 225 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_U64 = 226 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_U64 = 227 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_U64 = 228 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LG_U64 = 229 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_U64 = 229 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_U64 = 230 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_TRU_U64 = 231 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_T_U64 = 231 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_F_U64 = 240 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LT_U64 = 241 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_EQ_U64 = 242 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LE_U64 = 243 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GT_U64 = 244 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_LG_U64 = 245 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_NE_U64 = 245 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_GE_U64 = 246 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_TRU_U64 = 247 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMPX_T_U64 = 247 + (u32)OpcodeMap::OP_MAP_VOPC, + // VOP2 + V_CNDMASK_B32 = 0 + (u32)OpcodeMap::OP_MAP_VOP2, + V_READLANE_B32 = 1 + (u32)OpcodeMap::OP_MAP_VOP2, + V_WRITELANE_B32 = 2 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ADD_F32 = 3 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUB_F32 = 4 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUBREV_F32 = 5 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAC_LEGACY_F32 = 6 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_LEGACY_F32 = 7 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_F32 = 8 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_I32_I24 = 9 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_HI_I32_I24 = 10 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_U32_U24 = 11 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_HI_U32_U24 = 12 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MIN_LEGACY_F32 = 13 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAX_LEGACY_F32 = 14 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MIN_F32 = 15 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAX_F32 = 16 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MIN_I32 = 17 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAX_I32 = 18 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MIN_U32 = 19 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAX_U32 = 20 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LSHR_B32 = 21 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LSHRREV_B32 = 22 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ASHR_I32 = 23 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ASHRREV_I32 = 24 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LSHL_B32 = 25 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LSHLREV_B32 = 26 + (u32)OpcodeMap::OP_MAP_VOP2, + V_AND_B32 = 27 + (u32)OpcodeMap::OP_MAP_VOP2, + V_OR_B32 = 28 + (u32)OpcodeMap::OP_MAP_VOP2, + V_XOR_B32 = 29 + (u32)OpcodeMap::OP_MAP_VOP2, + V_BFM_B32 = 30 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAC_F32 = 31 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MADMK_F32 = 32 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MADAK_F32 = 33 + (u32)OpcodeMap::OP_MAP_VOP2, + V_BCNT_U32_B32 = 34 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MBCNT_LO_U32_B32 = 35 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MBCNT_HI_U32_B32 = 36 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ADD_I32 = 37 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUB_I32 = 38 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUBREV_I32 = 39 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ADDC_U32 = 40 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUBB_U32 = 41 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUBBREV_U32 = 42 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LDEXP_F32 = 43 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PKACCUM_U8_F32 = 44 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PKNORM_I16_F32 = 45 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PKNORM_U16_F32 = 46 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PKRTZ_F16_F32 = 47 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PK_U16_U32 = 48 + (u32)OpcodeMap::OP_MAP_VOP2, + V_CVT_PK_I16_I32 = 49 + (u32)OpcodeMap::OP_MAP_VOP2, + // VOP1 + V_NOP = 0 + (u32)OpcodeMap::OP_MAP_VOP1, + V_MOV_B32 = 1 + (u32)OpcodeMap::OP_MAP_VOP1, + V_READFIRSTLANE_B32 = 2 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_I32_F64 = 3 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F64_I32 = 4 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_I32 = 5 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_U32 = 6 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_U32_F32 = 7 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_I32_F32 = 8 + (u32)OpcodeMap::OP_MAP_VOP1, + V_MOV_FED_B32 = 9 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F16_F32 = 10 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_F16 = 11 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_RPI_I32_F32 = 12 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_FLR_I32_F32 = 13 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_OFF_F32_I4 = 14 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_F64 = 15 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F64_F32 = 16 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_UBYTE0 = 17 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_UBYTE1 = 18 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_UBYTE2 = 19 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F32_UBYTE3 = 20 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_U32_F64 = 21 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CVT_F64_U32 = 22 + (u32)OpcodeMap::OP_MAP_VOP1, + V_TRUNC_F64 = 23 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CEIL_F64 = 24 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RNDNE_F64 = 25 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FLOOR_F64 = 26 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FRACT_F32 = 32 + (u32)OpcodeMap::OP_MAP_VOP1, + V_TRUNC_F32 = 33 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CEIL_F32 = 34 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RNDNE_F32 = 35 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FLOOR_F32 = 36 + (u32)OpcodeMap::OP_MAP_VOP1, + V_EXP_F32 = 37 + (u32)OpcodeMap::OP_MAP_VOP1, + V_LOG_CLAMP_F32 = 38 + (u32)OpcodeMap::OP_MAP_VOP1, + V_LOG_F32 = 39 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_CLAMP_F32 = 40 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_LEGACY_F32 = 41 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_F32 = 42 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_IFLAG_F32 = 43 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RSQ_CLAMP_F32 = 44 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RSQ_LEGACY_F32 = 45 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RSQ_F32 = 46 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_F64 = 47 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_CLAMP_F64 = 48 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RSQ_F64 = 49 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RSQ_CLAMP_F64 = 50 + (u32)OpcodeMap::OP_MAP_VOP1, + V_SQRT_F32 = 51 + (u32)OpcodeMap::OP_MAP_VOP1, + V_SQRT_F64 = 52 + (u32)OpcodeMap::OP_MAP_VOP1, + V_SIN_F32 = 53 + (u32)OpcodeMap::OP_MAP_VOP1, + V_COS_F32 = 54 + (u32)OpcodeMap::OP_MAP_VOP1, + V_NOT_B32 = 55 + (u32)OpcodeMap::OP_MAP_VOP1, + V_BFREV_B32 = 56 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FFBH_U32 = 57 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FFBL_B32 = 58 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FFBH_I32 = 59 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FREXP_EXP_I32_F64 = 60 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FREXP_MANT_F64 = 61 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FRACT_F64 = 62 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FREXP_EXP_I32_F32 = 63 + (u32)OpcodeMap::OP_MAP_VOP1, + V_FREXP_MANT_F32 = 64 + (u32)OpcodeMap::OP_MAP_VOP1, + V_CLREXCP = 65 + (u32)OpcodeMap::OP_MAP_VOP1, + V_MOVRELD_B32 = 66 + (u32)OpcodeMap::OP_MAP_VOP1, + V_MOVRELS_B32 = 67 + (u32)OpcodeMap::OP_MAP_VOP1, + V_MOVRELSD_B32 = 68 + (u32)OpcodeMap::OP_MAP_VOP1, + V_LOG_LEGACY_F32 = 69 + (u32)OpcodeMap::OP_MAP_VOP1, + V_EXP_LEGACY_F32 = 70 + (u32)OpcodeMap::OP_MAP_VOP1, + // VOP3 + V_MAD_LEGACY_F32 = 320 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_F32 = 321 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_I32_I24 = 322 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_U32_U24 = 323 + (u32)OpcodeMap::OP_MAP_VOP3, + V_CUBEID_F32 = 324 + (u32)OpcodeMap::OP_MAP_VOP3, + V_CUBESC_F32 = 325 + (u32)OpcodeMap::OP_MAP_VOP3, + V_CUBETC_F32 = 326 + (u32)OpcodeMap::OP_MAP_VOP3, + V_CUBEMA_F32 = 327 + (u32)OpcodeMap::OP_MAP_VOP3, + V_BFE_U32 = 328 + (u32)OpcodeMap::OP_MAP_VOP3, + V_BFE_I32 = 329 + (u32)OpcodeMap::OP_MAP_VOP3, + V_BFI_B32 = 330 + (u32)OpcodeMap::OP_MAP_VOP3, + V_FMA_F32 = 331 + (u32)OpcodeMap::OP_MAP_VOP3, + V_FMA_F64 = 332 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LERP_U8 = 333 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ALIGNBIT_B32 = 334 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ALIGNBYTE_B32 = 335 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MULLIT_F32 = 336 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MIN3_F32 = 337 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MIN3_I32 = 338 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MIN3_U32 = 339 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAX3_F32 = 340 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAX3_I32 = 341 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAX3_U32 = 342 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MED3_F32 = 343 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MED3_I32 = 344 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MED3_U32 = 345 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SAD_U8 = 346 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SAD_HI_U8 = 347 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SAD_U16 = 348 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SAD_U32 = 349 + (u32)OpcodeMap::OP_MAP_VOP3, + V_CVT_PK_U8_F32 = 350 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_FIXUP_F32 = 351 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_FIXUP_F64 = 352 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHL_B64 = 353 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHR_B64 = 354 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ASHR_I64 = 355 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ADD_F64 = 356 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MUL_F64 = 357 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MIN_F64 = 358 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAX_F64 = 359 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LDEXP_F64 = 360 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MUL_LO_U32 = 361 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MUL_HI_U32 = 362 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MUL_LO_I32 = 363 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MUL_HI_I32 = 364 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_SCALE_F32 = 365 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_SCALE_F64 = 366 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_FMAS_F32 = 367 + (u32)OpcodeMap::OP_MAP_VOP3, + V_DIV_FMAS_F64 = 368 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MSAD_U8 = 369 + (u32)OpcodeMap::OP_MAP_VOP3, + V_QSAD_U8 = 370 + (u32)OpcodeMap::OP_MAP_VOP3, + V_QSAD_PK_U16_U8 = 370 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MQSAD_U8 = 371 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MQSAD_PK_U16_U8 = 371 + (u32)OpcodeMap::OP_MAP_VOP3, + V_TRIG_PREOP_F64 = 372 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MQSAD_U32_U8 = 373 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_U64_U32 = 374 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_I64_I32 = 375 + (u32)OpcodeMap::OP_MAP_VOP3, + // VINTRP + V_INTERP_P1_F32 = 0 + (u32)OpcodeMap::OP_MAP_VINTRP, + V_INTERP_P2_F32 = 1 + (u32)OpcodeMap::OP_MAP_VINTRP, + V_INTERP_MOV_F32 = 2 + (u32)OpcodeMap::OP_MAP_VINTRP, + // SMRD + S_LOAD_DWORD = 0 + (u32)OpcodeMap::OP_MAP_SMRD, + S_LOAD_DWORDX2 = 1 + (u32)OpcodeMap::OP_MAP_SMRD, + S_LOAD_DWORDX4 = 2 + (u32)OpcodeMap::OP_MAP_SMRD, + S_LOAD_DWORDX8 = 3 + (u32)OpcodeMap::OP_MAP_SMRD, + S_LOAD_DWORDX16 = 4 + (u32)OpcodeMap::OP_MAP_SMRD, + S_BUFFER_LOAD_DWORD = 8 + (u32)OpcodeMap::OP_MAP_SMRD, + S_BUFFER_LOAD_DWORDX2 = 9 + (u32)OpcodeMap::OP_MAP_SMRD, + S_BUFFER_LOAD_DWORDX4 = 10 + (u32)OpcodeMap::OP_MAP_SMRD, + S_BUFFER_LOAD_DWORDX8 = 11 + (u32)OpcodeMap::OP_MAP_SMRD, + S_BUFFER_LOAD_DWORDX16 = 12 + (u32)OpcodeMap::OP_MAP_SMRD, + S_MEMTIME = 30 + (u32)OpcodeMap::OP_MAP_SMRD, + S_DCACHE_INV = 31 + (u32)OpcodeMap::OP_MAP_SMRD, + // DS + DS_ADD_U32 = 0 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_U32 = 1 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_U32 = 2 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_U32 = 3 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_U32 = 4 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_I32 = 5 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_I32 = 6 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_U32 = 7 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_U32 = 8 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_B32 = 9 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_B32 = 10 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_B32 = 11 + (u32)OpcodeMap::OP_MAP_DS, + DS_MSKOR_B32 = 12 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B32 = 13 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE2_B32 = 14 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE2ST64_B32 = 15 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_B32 = 16 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_F32 = 17 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_F32 = 18 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_F32 = 19 + (u32)OpcodeMap::OP_MAP_DS, + DS_NOP = 20 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_SEMA_RELEASE_ALL = 24 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_INIT = 25 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_SEMA_V = 26 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_SEMA_BR = 27 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_SEMA_P = 28 + (u32)OpcodeMap::OP_MAP_DS, + DS_GWS_BARRIER = 29 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B8 = 30 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B16 = 31 + (u32)OpcodeMap::OP_MAP_DS, + DS_ADD_RTN_U32 = 32 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_RTN_U32 = 33 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_RTN_U32 = 34 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_RTN_U32 = 35 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_RTN_U32 = 36 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_I32 = 37 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_I32 = 38 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_U32 = 39 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_U32 = 40 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_RTN_B32 = 41 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_RTN_B32 = 42 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_RTN_B32 = 43 + (u32)OpcodeMap::OP_MAP_DS, + DS_MSKOR_RTN_B32 = 44 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG_RTN_B32 = 45 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG2_RTN_B32 = 46 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG2ST64_RTN_B32 = 47 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_RTN_B32 = 48 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_RTN_F32 = 49 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_F32 = 50 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_F32 = 51 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRAP_RTN_B32 = 52 + (u32)OpcodeMap::OP_MAP_DS, + DS_SWIZZLE_B32 = 53 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_B32 = 54 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ2_B32 = 55 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ2ST64_B32 = 56 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_I8 = 57 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_U8 = 58 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_I16 = 59 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_U16 = 60 + (u32)OpcodeMap::OP_MAP_DS, + DS_CONSUME = 61 + (u32)OpcodeMap::OP_MAP_DS, + DS_APPEND = 62 + (u32)OpcodeMap::OP_MAP_DS, + DS_ORDERED_COUNT = 63 + (u32)OpcodeMap::OP_MAP_DS, + DS_ADD_U64 = 64 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_U64 = 65 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_U64 = 66 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_U64 = 67 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_U64 = 68 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_I64 = 69 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_I64 = 70 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_U64 = 71 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_U64 = 72 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_B64 = 73 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_B64 = 74 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_B64 = 75 + (u32)OpcodeMap::OP_MAP_DS, + DS_MSKOR_B64 = 76 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B64 = 77 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE2_B64 = 78 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE2ST64_B64 = 79 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_B64 = 80 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_F64 = 81 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_F64 = 82 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_F64 = 83 + (u32)OpcodeMap::OP_MAP_DS, + DS_ADD_RTN_U64 = 96 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_RTN_U64 = 97 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_RTN_U64 = 98 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_RTN_U64 = 99 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_RTN_U64 = 100 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_I64 = 101 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_I64 = 102 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_U64 = 103 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_U64 = 104 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_RTN_B64 = 105 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_RTN_B64 = 106 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_RTN_B64 = 107 + (u32)OpcodeMap::OP_MAP_DS, + DS_MSKOR_RTN_B64 = 108 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG_RTN_B64 = 109 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG2_RTN_B64 = 110 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRXCHG2ST64_RTN_B64 = 111 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_RTN_B64 = 112 + (u32)OpcodeMap::OP_MAP_DS, + DS_CMPST_RTN_F64 = 113 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_RTN_F64 = 114 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_RTN_F64 = 115 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_B64 = 118 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ2_B64 = 119 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ2ST64_B64 = 120 + (u32)OpcodeMap::OP_MAP_DS, + DS_CONDXCHG32_RTN_B64 = 126 + (u32)OpcodeMap::OP_MAP_DS, + DS_ADD_SRC2_U32 = 128 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_SRC2_U32 = 129 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_SRC2_U32 = 130 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_SRC2_U32 = 131 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_SRC2_U32 = 132 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_I32 = 133 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_I32 = 134 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_U32 = 135 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_U32 = 136 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_SRC2_B32 = 137 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_SRC2_B32 = 138 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_SRC2_B32 = 139 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_SRC2_B32 = 141 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_F32 = 146 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_F32 = 147 + (u32)OpcodeMap::OP_MAP_DS, + DS_ADD_SRC2_U64 = 192 + (u32)OpcodeMap::OP_MAP_DS, + DS_SUB_SRC2_U64 = 193 + (u32)OpcodeMap::OP_MAP_DS, + DS_RSUB_SRC2_U64 = 194 + (u32)OpcodeMap::OP_MAP_DS, + DS_INC_SRC2_U64 = 195 + (u32)OpcodeMap::OP_MAP_DS, + DS_DEC_SRC2_U64 = 196 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_I64 = 197 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_I64 = 198 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_U64 = 199 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_U64 = 200 + (u32)OpcodeMap::OP_MAP_DS, + DS_AND_SRC2_B64 = 201 + (u32)OpcodeMap::OP_MAP_DS, + DS_OR_SRC2_B64 = 202 + (u32)OpcodeMap::OP_MAP_DS, + DS_XOR_SRC2_B64 = 203 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_SRC2_B64 = 205 + (u32)OpcodeMap::OP_MAP_DS, + DS_MIN_SRC2_F64 = 210 + (u32)OpcodeMap::OP_MAP_DS, + DS_MAX_SRC2_F64 = 211 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B96 = 222 + (u32)OpcodeMap::OP_MAP_DS, + DS_WRITE_B128 = 223 + (u32)OpcodeMap::OP_MAP_DS, + DS_CONDXCHG32_RTN_B128 = 253 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_B96 = 254 + (u32)OpcodeMap::OP_MAP_DS, + DS_READ_B128 = 255 + (u32)OpcodeMap::OP_MAP_DS, + // MUBUF + BUFFER_LOAD_FORMAT_X = 0 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_FORMAT_XY = 1 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_FORMAT_XYZ = 2 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_FORMAT_XYZW = 3 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_FORMAT_X = 4 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_FORMAT_XY = 5 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_FORMAT_XYZ = 6 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_FORMAT_XYZW = 7 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_UBYTE = 8 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_SBYTE = 9 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_USHORT = 10 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_SSHORT = 11 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_DWORD = 12 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_DWORDX2 = 13 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_DWORDX4 = 14 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_LOAD_DWORDX3 = 15 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_BYTE = 24 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_SHORT = 26 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_DWORD = 28 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_DWORDX2 = 29 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_DWORDX4 = 30 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_STORE_DWORDX3 = 31 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SWAP = 48 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_CMPSWAP = 49 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_ADD = 50 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SUB = 51 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SMIN = 53 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_UMIN = 54 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SMAX = 55 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_UMAX = 56 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_AND = 57 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_OR = 58 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_XOR = 59 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_INC = 60 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_DEC = 61 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FCMPSWAP = 62 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FMIN = 63 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FMAX = 64 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SWAP_X2 = 80 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_CMPSWAP_X2 = 81 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_ADD_X2 = 82 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SUB_X2 = 83 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SMIN_X2 = 85 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_UMIN_X2 = 86 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_SMAX_X2 = 87 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_UMAX_X2 = 88 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_AND_X2 = 89 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_OR_X2 = 90 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_XOR_X2 = 91 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_INC_X2 = 92 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_DEC_X2 = 93 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FCMPSWAP_X2 = 94 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FMIN_X2 = 95 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_ATOMIC_FMAX_X2 = 96 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_WBINVL1_SC = 112 + (u32)OpcodeMap::OP_MAP_MUBUF, + BUFFER_WBINVL1 = 113 + (u32)OpcodeMap::OP_MAP_MUBUF, + // MTBUF + TBUFFER_LOAD_FORMAT_X = 0 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_LOAD_FORMAT_XY = 1 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_LOAD_FORMAT_XYZ = 2 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_LOAD_FORMAT_XYZW = 3 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_STORE_FORMAT_X = 4 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_STORE_FORMAT_XY = 5 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_STORE_FORMAT_XYZ = 6 + (u32)OpcodeMap::OP_MAP_MTBUF, + TBUFFER_STORE_FORMAT_XYZW = 7 + (u32)OpcodeMap::OP_MAP_MTBUF, + // MIMG + IMAGE_LOAD = 0 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_LOAD_MIP = 1 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_LOAD_PCK = 2 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_LOAD_PCK_SGN = 3 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_LOAD_MIP_PCK = 4 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_LOAD_MIP_PCK_SGN = 5 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_STORE = 8 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_STORE_MIP = 9 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_STORE_PCK = 10 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_STORE_MIP_PCK = 11 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GET_RESINFO = 14 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_SWAP = 15 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_CMPSWAP = 16 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_ADD = 17 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_SUB = 18 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_SMIN = 20 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_UMIN = 21 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_SMAX = 22 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_UMAX = 23 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_AND = 24 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_OR = 25 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_XOR = 26 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_INC = 27 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_DEC = 28 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_FCMPSWAP = 29 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_FMIN = 30 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_ATOMIC_FMAX = 31 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE = 32 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CL = 33 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_D = 34 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_D_CL = 35 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_L = 36 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_B = 37 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_B_CL = 38 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_LZ = 39 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C = 40 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CL = 41 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_D = 42 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_D_CL = 43 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_L = 44 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_B = 45 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_B_CL = 46 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_LZ = 47 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_O = 48 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CL_O = 49 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_D_O = 50 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_D_CL_O = 51 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_L_O = 52 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_B_O = 53 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_B_CL_O = 54 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_LZ_O = 55 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_O = 56 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CL_O = 57 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_D_O = 58 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_D_CL_O = 59 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_L_O = 60 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_B_O = 61 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_B_CL_O = 62 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_LZ_O = 63 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4 = 64 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_CL = 65 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_L = 68 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_B = 69 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_B_CL = 70 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_LZ = 71 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C = 72 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_CL = 73 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_L = 76 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_B = 77 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_B_CL = 78 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_LZ = 79 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_O = 80 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_CL_O = 81 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_L_O = 84 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_B_O = 85 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_B_CL_O = 86 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_LZ_O = 87 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_O = 88 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_CL_O = 89 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_L_O = 92 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_B_O = 93 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_B_CL_O = 94 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GATHER4_C_LZ_O = 95 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_GET_LOD = 96 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CD = 104 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CD_CL = 105 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CD = 106 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CD_CL = 107 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CD_O = 108 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_CD_CL_O = 109 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CD_O = 110 + (u32)OpcodeMap::OP_MAP_MIMG, + IMAGE_SAMPLE_C_CD_CL_O = 111 + (u32)OpcodeMap::OP_MAP_MIMG, + // EXP + EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP, +}; + +enum class EncodingMask : u32 { + MASK_9bit = 0x000001FFULL << 23, + MASK_7bit = 0x0000007FULL << 25, + MASK_6bit = 0x0000003FULL << 26, + MASK_5bit = 0x0000001FULL << 27, + MASK_4bit = 0x0000000FULL << 28, + MASK_2bit = 0x00000003ULL << 30, + MASK_1bit = 0x00000001ULL << 31 +}; + +enum class InstEncoding : u32 { + /// InstructionEncodingMask_9bit + /// bits [31:23] - (1 0 1 1 1 1 1 0 1) + SOP1 = 0x0000017Du << 23, + /// bits [31:23] - (1 0 1 1 1 1 1 1 1) + SOPP = 0x0000017Fu << 23, + /// bits [31:23] - (1 0 1 1 1 1 1 1 0) + SOPC = 0x0000017Eu << 23, + /// InstructionEncodingMask_7bit + /// bits [31:25] - (0 1 1 1 1 1 1) + VOP1 = 0x0000003Fu << 25, + /// bits [31:25] - (0 1 1 1 1 1 0) + VOPC = 0x0000003Eu << 25, + /// InstructionEncodingMask_6bit + /// bits [31:26] - (1 1 0 1 0 0) + VOP3 = 0x00000034u << 26, + /// bits [31:26] - (1 1 1 1 1 0) + EXP = 0x0000003Eu << 26, + /// bits [31:26] - (1 1 0 0 1 0) + VINTRP = 0x00000032u << 26, + /// bits [31:26] - (1 1 0 1 1 0) + DS = 0x00000036u << 26, + /// bits [31:26] - (1 1 1 0 0 0) + MUBUF = 0x00000038u << 26, + /// bits [31:26] - (1 1 1 0 1 0) + MTBUF = 0x0000003Au << 26, + /// bits [31:26] - (1 1 1 1 0 0) + MIMG = 0x0000003Cu << 26, + /// InstructionEncodingMask_5bit + /// bits [31:27] - (1 1 0 0 0) + SMRD = 0x00000018u << 27, + /// InstructionEncodingMask_4bit + /// bits [31:28] - (1 0 1 1) + SOPK = 0x0000000Bu << 28, + /// InstructionEncodingMask_2bit + /// bits [31:30] - (1 0) + SOP2 = 0x00000002u << 30, + /// InstructionEncodingMask_1bit + /// bits [31:31] - (0) + VOP2 = 0x00000000u << 31, + + ILLEGAL +}; + +enum class InstClass : u32 { + Undefined, + + ScalarArith, + ScalarAbs, + ScalarMov, + ScalarMovRel, + ScalarCmp, + ScalarSelect, + ScalarBitLogic, + ScalarBitManip, + ScalarBitField, + ScalarConv, + ScalarExecMask, + ScalarQuadMask, + + VectorRegMov, + VectorMovRel, + VectorLane, + VectorBitLogic, + VectorBitField32, + VectorThreadMask, + VectorBitField64, + VectorFpArith32, + VectorFpRound32, + VectorFpField32, + VectorFpTran32, + VectorFpCmp32, + VectorFpArith64, + VectorFpRound64, + VectorFpField64, + VectorFpTran64, + VectorFpCmp64, + VectorIntArith32, + VectorIntArith64, + VectorIntCmp32, + VectorIntCmp64, + VectorConv, + VectorFpGraph32, + VectorIntGraph, + VectorMisc, + + ScalarProgFlow, + ScalarSync, + ScalarWait, + ScalarCache, + ScalarPrior, + ScalarRegAccess, + ScalarMsg, + + ScalarMemRd, + ScalarMemUt, + + VectorMemBufNoFmt, + VectorMemBufFmt, + VectorMemBufAtomic, + VectorMemImgNoSmp, + VectorMemImgSmp, + VectorMemImgUt, + VectorMemL1Cache, + + DsIdxRd, + DsIdxWr, + DsIdxWrXchg, + DsIdxCondXchg, + DsIdxWrap, + DsAtomicArith32, + DsAtomicArith64, + DsAtomicMinMax32, + DsAtomicMinMax64, + DsAtomicCmpSt32, + DsAtomicCmpSt64, + DsAtomicLogic32, + DsAtomicLogic64, + DsAppendCon, + DsDataShareUt, + DsDataShareMisc, + GdsSync, + GdsOrdCnt, + + VectorInterpFpCache, + + Exp, + + DbgProf +}; + +enum class InstCategory : u32 { + Undefined, + /// Scalar ALU Operation + ScalarALU, + /// Scalar Instruction Memory + ScalarMemory, + /// Vector ALU Operation + VectorALU, + /// Vector Instruction Memory Read + VectorMemory, + /// Scalar Program Flow Control and Special Operations + FlowControl, + /// LDS and GDS + DataShare, + /// Vector Interpolation Operations + VectorInterpolation, + /// Export + Export, + /// Debug and Profiling Operations + DebugProfile, +}; + +enum class ScalarType : u32 { + Undefined, + Any, + Uint32, + Uint64, + Sint32, + Sint64, + Float16, + Float32, + Float64, + Bool, +}; + +/// Instruction's action when GPU thread is not active. +enum class DivergentAction : u32 { Nop, ZeroScalar, Execute }; + +enum class OperandField : u32 { + ScalarGPR = 0, + VccLo = 106, + VccHi, + M0 = 124, + ExecLo = 126, + ExecHi, + ConstZero, + SignedConstIntPos = 129, + SignedConstIntNeg = 193, + ConstFloatPos_0_5 = 240, + ConstFloatNeg_0_5, + ConstFloatPos_1_0, + ConstFloatNeg_1_0, + ConstFloatPos_2_0, + ConstFloatNeg_2_0, + ConstFloatPos_4_0, + ConstFloatNeg_4_0, + VccZ = 251, + ExecZ, + Scc, + LdsDirect, + LiteralConst, + VectorGPR, + + Undefined = 0xFFFFFFFF, +}; + +enum class SystemValue : u32 { + None = 0, + Position = 1, + ClipDistance = 2, + CullDistance = 3, + RenderTargetId = 4, + ViewportId = 5, + VertexId = 6, + PrimitiveId = 7, + InstanceId = 8, + IsFrontFace = 9, + SampleIndex = 10, + FinalQuadUeq0EdgeTessFactor = 11, + FinalQuadVeq0EdgeTessFactor = 12, + FinalQuadUeq1EdgeTessFactor = 13, + FinalQuadVeq1EdgeTessFactor = 14, + FinalQuadUInsideTessFactor = 15, + FinalQuadVInsideTessFactor = 16, + FinalTriUeq0EdgeTessFactor = 17, + FinalTriVeq0EdgeTessFactor = 18, + FinalTriWeq0EdgeTessFactor = 19, + FinalTriInsideTessFactor = 20, + FinalLineDetailTessFactor = 21, + FinalLineDensityTessFactor = 22, + GlobalInvocationId = 23, + WorkgroupId = 24, + LocalInvocationId = 25, + LocalInvocationIndex = 26, + Target = 64, + Depth = 65, + Coverage = 66, + DepthGe = 67, + DepthLe = 68, + NumSubgroups = 70, + SubgroupID = 71, + SubgroupSize = 72, + SubgroupInvocationID = 73, + SubgroupEqMask = 74, + SubgroupGeMask = 75, + SubgroupGtMask = 76, + SubgroupLeMask = 77, + SubgroupLtMask = 78, +}; + +enum class InterpolationMode : u32 { + Undefined = 0, + Constant = 1, + Linear = 2, + LinearCentroid = 3, + LinearNoPerspective = 4, + LinearNoPerspectiveCentroid = 5, + LinearSample = 6, + LinearNoPerspectiveSample = 7, +}; + +enum class MimgModifier : u32 { + Lod, /// LOD is used instead of TA computed LOD. + LodBias, /// Add this BIAS to the LOD TA computes. + LodClamp, /// Clamp the LOD to be no larger than this value. + Derivative, /// Send dx/dv, dx/dy, etc. slopes to TA for it to used in LOD computation. + CoarseDerivative, /// Send dx/dv, dx/dy, etc. slopes to TA for it to used in LOD computation. + Level0, /// Force use of MIP level 0. + Pcf, /// Percentage closer filtering. + Offset /// Send X, Y, Z integer offsets (packed into 1 Dword) to offset XYZ address. +}; +using MimgModifierFlags = Common::Flags; + +enum class ImageResComponent : u32 { Width, Height, Depth, MipCount }; +using ImageResFlags = Common::Flags; + +enum class ImageAddrComponent : u32 { + Offsets, + Bias, + Zpcf, + DxDh, + DyDh, + DzDh, + DxDv, + DyDv, + DzDv, + X, + Y, + Z, + Slice, + FaceId, + FragId, + Lod, + Clamp, +}; + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp new file mode 100644 index 00000000..f593529d --- /dev/null +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -0,0 +1,829 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include "shader_recompiler/frontend/structured_control_flow.h" +#include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/ir_emitter.h" + +namespace Shader::Gcn { + +namespace { + +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook>; + +using Tree = boost::intrusive::list, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size>; +using Node = Tree::iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Kill, + Unreachable, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct Kill {}; +struct Unreachable {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +struct Statement : ListBaseHook { + Statement(const Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} + Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} + Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, IR::Condition cond_, Statement* up_) + : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) + : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_, Statement* up_) + : id{id_}, up{up_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + const Block* block; + Node label; + Tree children; + IR::Condition guest_cond; + Statement* op; + Statement* op_a; + u32 location; + s32 branch_offset; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return ""; + } +} + +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, + stmt->block->begin, stmt->block->end, + reinterpret_cast(stmt->block)); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::Kill: + ret += fmt::format("{} kill;\n", indent); + break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { + Node it{goto_stmt}; + do { + if (it == label_stmt) { + return true; + } + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + if (it == label_stmt) { + return true; + } + ++it; + } + return false; +} + +Node SiblingFromNephew(Node uncle, Node nephew) noexcept { + Statement* const parent{uncle->up}; + Statement* it{&*nephew}; + while (it->up != parent) { + it = it->up; + } + return Tree::s_iterator_to(*it); +} + +bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { + const Node end{right_sibling->up->children.end()}; + for (auto it = right_sibling; it != end; ++it) { + if (it == left_sibling) { + return false; + } + } + return true; +} + +bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { + const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; + return AreOrdered(sibling, goto_stmt); +} + +/** + * The algorithm used here is from: + * Taming Control Flow: A Structured Approach to Eliminating Goto Statements. + * Ana M. Erosa and Laurie J. Hendren + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.42.1485&rep=rep1&type=pdf + */ +class GotoPass { +public: + explicit GotoPass(CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); + } + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (NeedsLift(goto_stmt, label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // Expensive operation: + if (!AreSiblings(goto_stmt, label_stmt)) { + throw LogicError("Goto is not a sibling with the label"); + } + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (AreOrdered(goto_stmt, label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector BuildTree(CFG& cfg) { + u32 label_id{0}; + std::vector gotos; + BuildTree(cfg, label_id, gotos, root_stmt.children.end(), std::nullopt); + return gotos; + } + + void BuildTree(CFG& cfg, u32& label_id, std::vector& gotos, Node function_insert_point, + std::optional return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition::False, &root_stmt)}; + Tree& root{root_stmt.children}; + std::unordered_map local_labels; + local_labels.reserve(cfg.blocks.size()); + + for (Block& block : cfg.blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); + ++label_id; + } + for (Block& block : cfg.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + root.insert(ip, *pool.Create(&block, &root_stmt)); + + switch (block.end_class) { + case EndClass::Branch: { + Statement* const always_cond{ + pool.Create(Identity{}, IR::Condition::True, &root_stmt)}; + if (block.cond == IR::Condition::True) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition::False) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case EndClass::Exit: + root.insert(ip, *pool.Create(Return{}, &root_stmt)); + break; + // case EndClass::Kill: + // root.insert(ip, *pool.Create(Kill{}, &root_stmt)); + // break; + } + } + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + switch (label_nested_stmt->type) { + case StatementType::If: + // Update nested if condition + label_nested_stmt->cond = + pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + body.insert(goto_stmt, *loop_stmt); + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + ObjectPool& pool; + Statement root_stmt{FunctionTag{}}; +}; + +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return &*forward_node; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", u32(stmt.type)); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Statement& root_stmt, + IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, + Stage stage_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, + syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} { + Visit(root_stmt, nullptr, nullptr); + + IR::Block& first_block{*syntax_list.front().data.block}; + IR::IREmitter ir(first_block, first_block.begin()); + ir.Prologue(); + } + +private: + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.data.block = current_block; + }}; + Tree& tree{parent.children}; + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + ensure_block(); + const u32 start = stmt.block->begin_index; + const u32 size = stmt.block->end_index - start + 1; + Translate(current_block, stage, inst_list.subspan(start, size)); + fmt::print("{}\n", IR::DumpBlock(*current_block)); + break; + } + case StatementType::SetVariable: { + ensure_block(); + IR::IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + ensure_block(); + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Implement if header block + IR::IREmitter ir{*current_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); + current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.data.if_node.cond = cond; + if_node.data.if_node.body = then_block; + if_node.data.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.data.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = merge_block; + break; + } + case StatementType::Loop: { + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + current_block->AddBranch(loop_header_block); + } + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.data.block = loop_header_block; + + IR::Block* const continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); + + // The continue block is located at the end of the loop + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + + IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; + loop_header_block->AddBranch(body_block); + + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); + + current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.data.loop.body = body_block; + loop.data.loop.continue_block = continue_block; + loop.data.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.data.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.data.repeat.cond = cond; + repeat.data.repeat.loop_header = loop_header_block; + repeat.data.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = merge_block; + break; + } + case StatementType::Break: { + ensure_block(); + IR::Block* const skip_block{MergeBlock(parent, stmt)}; + + IR::IREmitter ir{*current_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); + current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.data.break_node.cond = cond; + break_node.data.break_node.merge = break_block; + break_node.data.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = skip_block; + break; + } + case StatementType::Return: { + ensure_block(); + IR::Block* return_block{block_pool.Create(inst_pool)}; + IR::IREmitter{*return_block}.Epilogue(); + current_block->AddBranch(return_block); + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = return_block; + + current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; + break; + } + case StatementType::Kill: { + ensure_block(); + IR::Block* demote_block{MergeBlock(parent, stmt)}; + // IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); + current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = demote_block; + break; + } + case StatementType::Unreachable: { + ensure_block(); + current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; + break; + } + default: + throw NotImplementedException("Statement type {}", u32(stmt.type)); + } + } + if (current_block) { + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); + } else { + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; + } + } + } + + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + } + return block_pool.Create(inst_pool); + } + + ObjectPool& stmt_pool; + ObjectPool& inst_pool; + ObjectPool& block_pool; + IR::AbstractSyntaxList& syntax_list; + const Block dummy_flow_block{}; + std::span inst_list; + Stage stage; +}; +} // Anonymous namespace + +IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, + CFG& cfg, Stage stage) { + ObjectPool stmt_pool{64}; + GotoPass goto_pass{cfg, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage}; + return syntax_list; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h new file mode 100644 index 00000000..fa7b6738 --- /dev/null +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/frontend/control_flow_graph.h" +#include "shader_recompiler/ir/abstract_syntax_list.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/value.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader { +enum class Stage : u32; +} + +namespace Shader::Gcn { + +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, + ObjectPool& block_pool, CFG& cfg, + Stage stage); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp new file mode 100644 index 00000000..9868ecd5 --- /dev/null +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) { + const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + if (is_pair) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0)); + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); + ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1)); + } else if (bit_size == 64) { + const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr)); + ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); + ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); + } else { + const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr); + ir.SetVectorReg(dst_reg, data); + } +} + +void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) { + const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; + const IR::VectorReg data0{inst.src[1].code}; + const IR::VectorReg data1{inst.src[2].code}; + if (is_pair) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); + ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + } else if (bit_size == 64) { + const IR::U64 data = ir.PackUint2x32( + ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1))); + ir.WriteShared(bit_size, data, addr); + } else { + ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr); + } +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp new file mode 100644 index 00000000..74aac4fb --- /dev/null +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::EXP(const GcnInst& inst) { + const auto& exp = inst.control.exp; + const IR::Attribute attrib{exp.target}; + const std::array vsrc = { + IR::VectorReg(inst.src[0].code), + IR::VectorReg(inst.src[1].code), + IR::VectorReg(inst.src[2].code), + IR::VectorReg(inst.src[3].code), + }; + + const auto unpack = [&](u32 idx) { + const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); + const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; + const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; + ir.SetAttribute(attrib, r, idx * 2); + ir.SetAttribute(attrib, g, idx * 2 + 1); + }; + + // Components are float16 packed into a VGPR + if (exp.compr) { + // Export R, G + if (exp.en & 1) { + unpack(0); + } + // Export B, A + if ((exp.en >> 2) & 1) { + unpack(1); + } + } else { + // Components are float32 into separate VGPRS + u32 mask = exp.en; + for (u32 i = 0; i < 4; i++, mask >>= 1) { + if ((mask & 1) == 0) { + continue; + } + const IR::F32 comp = ir.GetVectorReg(vsrc[i]); + ir.SetAttribute(attrib, comp, i); + } + } +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/flat_memory.cpp b/src/shader_recompiler/frontend/translate/flat_memory.cpp new file mode 100644 index 00000000..e69de29b diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp new file mode 100644 index 00000000..c920f936 --- /dev/null +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::S_MOV(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::S_MUL_I32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); +} + +void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) { + const IR::U32 lhs = GetSrc(inst.src[0]); + const IR::U32 rhs = GetSrc(inst.src[1]); + const IR::U1 result = [&] { + switch (cond) { + case ConditionOp::EQ: + return ir.IEqual(lhs, rhs); + case ConditionOp::LG: + return ir.INotEqual(lhs, rhs); + case ConditionOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case ConditionOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case ConditionOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case ConditionOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + } + }(); + // ir.SetScc(result); +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp new file mode 100644 index 00000000..dc02dfd2 --- /dev/null +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg, + const IR::U32U64& address) { + for (u32 i = 0; i < num_dwords; i++) { + const IR::U32 value = handle.IsEmpty() ? ir.ReadConst(address, ir.Imm32(i)) + : ir.ReadConstBuffer(handle, address, ir.Imm32(i)); + ir.SetScalarReg(dst_reg++, value); + } +} + +void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { + const auto& smrd = inst.control.smrd; + const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2); + const IR::U32 offset = + smrd.imm ? ir.Imm32(smrd.offset * 4) + : IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), + ir.Imm32(2))}; + const IR::U64 base = + ir.PackUint2x32(ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1))); + const IR::U64 address = ir.IAdd(base, offset); + const IR::ScalarReg dst_reg{inst.dst[0].code}; + Load(ir, num_dwords, {}, dst_reg, address); +} + +void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { + const auto& smrd = inst.control.smrd; + const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2); + const IR::U32 offset = + smrd.imm ? ir.Imm32(smrd.offset * 4) + : IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), + ir.Imm32(2))}; + const IR::Value vsharp = + ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1), + ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3)); + const IR::ScalarReg dst_reg{inst.dst[0].code}; + Load(ir, num_dwords, vsharp, dst_reg, offset); +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp new file mode 100644 index 00000000..002351ca --- /dev/null +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -0,0 +1,152 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Gcn { + +Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} { + IR::VectorReg dst_vreg = IR::VectorReg::V0; + switch (stage) { + case Stage::Vertex: + // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; + case Stage::Fragment: + // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258 + // The first two VGPRs are used for i/j barycentric coordinates. In the vast majority of + // cases it will be only those two, but if shader is using both e.g linear and perspective + // inputs it can be more For now assume that this isn't the case. + dst_vreg = IR::VectorReg::V2; + for (u32 i = 0; i < 4; i++) { + ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, i)); + } + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace)); + break; + default: + throw NotImplementedException("Unknown shader stage"); + } + + // Initialize user data. + IR::ScalarReg dst_sreg = IR::ScalarReg::S0; + for (u32 i = 0; i < 16; i++) { + ir.SetScalarReg(dst_sreg++, ir.Imm32(0U)); + } +} + +IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { + switch (operand.field) { + case OperandField::ScalarGPR: + if (operand.type == ScalarType::Float32 || force_flt) { + return ir.GetScalarReg(IR::ScalarReg(operand.code)); + } else { + return ir.GetScalarReg(IR::ScalarReg(operand.code)); + } + case OperandField::VectorGPR: + if (operand.type == ScalarType::Float32 || force_flt) { + return ir.GetVectorReg(IR::VectorReg(operand.code)); + } else { + return ir.GetVectorReg(IR::VectorReg(operand.code)); + } + case OperandField::ConstZero: + if (force_flt) { + return ir.Imm32(0.f); + } else { + return ir.Imm32(0U); + } + case OperandField::SignedConstIntPos: + ASSERT(!force_flt); + return ir.Imm32(operand.code - SignedConstIntPosMin + 1); + case OperandField::SignedConstIntNeg: + ASSERT(!force_flt); + return ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1); + case OperandField::LiteralConst: + ASSERT(!force_flt); + return ir.Imm32(operand.code); + case OperandField::ConstFloatPos_1_0: + return ir.Imm32(1.f); + case OperandField::ConstFloatPos_0_5: + return ir.Imm32(0.5f); + case OperandField::ConstFloatNeg_0_5: + return ir.Imm32(-0.5f); + default: + UNREACHABLE(); + } +} + +void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { + switch (operand.field) { + case OperandField::ScalarGPR: + return ir.SetScalarReg(IR::ScalarReg(operand.code), value); + case OperandField::VectorGPR: + return ir.SetVectorReg(IR::VectorReg(operand.code), value); + case OperandField::VccHi: + case OperandField::M0: + break; // Ignore for now + default: + UNREACHABLE(); + } +} + +void Translate(IR::Block* block, Stage stage, std::span inst_list) { + if (inst_list.empty()) { + return; + } + Translator translator{block, stage}; + for (const auto& inst : inst_list) { + switch (inst.opcode) { + case Opcode::S_MOV_B32: + translator.S_MOV(inst); + break; + case Opcode::S_MUL_I32: + translator.S_MUL_I32(inst); + break; + case Opcode::V_MOV_B32: + translator.V_MOV(inst); + break; + case Opcode::V_MAC_F32: + translator.V_MAC_F32(inst); + break; + case Opcode::V_MUL_F32: + translator.V_MUL_F32(inst); + break; + case Opcode::S_SWAPPC_B64: + case Opcode::S_WAITCNT: + break; // Ignore for now. + case Opcode::S_BUFFER_LOAD_DWORDX16: + translator.S_BUFFER_LOAD_DWORD(16, inst); + break; + case Opcode::EXP: + translator.EXP(inst); + break; + case Opcode::V_INTERP_P2_F32: + translator.V_INTERP_P2_F32(inst); + break; + case Opcode::V_CVT_PKRTZ_F16_F32: + translator.V_CVT_PKRTZ_F16_F32(inst); + break; + case Opcode::IMAGE_SAMPLE: + translator.IMAGE_SAMPLE(inst); + break; + case Opcode::V_CMP_EQ_U32: + translator.V_CMP_EQ_U32(inst); + break; + case Opcode::V_CNDMASK_B32: + translator.V_CNDMASK_B32(inst); + break; + case Opcode::S_MOV_B64: + case Opcode::S_WQM_B64: + case Opcode::V_INTERP_P1_F32: + case Opcode::S_ENDPGM: + break; + default: + UNREACHABLE_MSG("Unknown opcode {}", u32(inst.opcode)); + } + } +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h new file mode 100644 index 00000000..2f972bef --- /dev/null +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" + +namespace Shader { +enum class Stage : u32; +} + +namespace Shader::Gcn { + +enum class ConditionOp : u32 { + EQ, + LG, + GT, + GE, + LT, + LE, +}; + +class Translator { +public: + explicit Translator(IR::Block* block_, Stage stage); + + // Scalar ALU + void S_MOV(const GcnInst& inst); + void S_MUL_I32(const GcnInst& inst); + void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); + + // Scalar Memory + void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); + void S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst); + + // Vector ALU + void V_MOV(const GcnInst& inst); + void V_SAD(const GcnInst& inst); + void V_MAC_F32(const GcnInst& inst); + void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); + void V_MUL_F32(const GcnInst& inst); + void V_CMP_EQ_U32(const GcnInst& inst); + void V_CNDMASK_B32(const GcnInst& inst); + + // Vector interpolation + void V_INTERP_P2_F32(const GcnInst& inst); + + // Data share + void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + + // MIMG + void IMAGE_GET_RESINFO(const GcnInst& inst); + void IMAGE_SAMPLE(const GcnInst& inst); + + // Export + void EXP(const GcnInst& inst); + +private: + IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false); + void SetDst(const InstOperand& operand, const IR::U32F32& value); + +private: + IR::Block* block; + IR::IREmitter ir; +}; + +void Translate(IR::Block* block, Stage stage, std::span inst_list); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp new file mode 100644 index 00000000..cbb3268c --- /dev/null +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#pragma clang optimize off +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::V_MOV(const GcnInst& inst) { + SetDst(inst.dst[0], GetSrc(inst.src[0])); +} + +void Translator::V_SAD(const GcnInst& inst) { + const IR::U32 abs_diff = ir.IAbs(ir.ISub(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); + SetDst(inst.dst[0], ir.IAdd(abs_diff, GetSrc(inst.src[2]))); +} + +void Translator::V_MAC_F32(const GcnInst& inst) { + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), GetSrc(inst.dst[0]))); +} + +void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])), + ir.FPConvert(16, GetSrc(inst.src[1]))); + ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32)); +} + +void Translator::V_MUL_F32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); +} + +void Translator::V_CMP_EQ_U32(const GcnInst& inst) { + const IR::U1 result = ir.IEqual(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + if (inst.dst[1].field == OperandField::VccLo) { + return ir.SetVcc(result); + } else if (inst.dst[1].field == OperandField::ScalarGPR) { + const IR::ScalarReg dst_reg{inst.dst[1].code}; + return ir.SetScalarReg(dst_reg, IR::U32{ir.Select(result, ir.Imm32(1U), ir.Imm32(0U))}); + } + UNREACHABLE(); +} + +void Translator::V_CNDMASK_B32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::ScalarReg flag_reg{inst.src[2].code}; + const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR + ? ir.INotEqual(ir.GetScalarReg(flag_reg), ir.Imm32(0U)) + : ir.GetVcc(); + + // We can treat the instruction as integer most of the time, but when a source is + // a floating point constant we will force the other as float for better readability + // The other operand is also higly likely to be float as well. + const auto is_float_const = [](OperandField field) { + return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0; + }; + const bool has_flt_source = + is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field); + const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source); + const IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source); + const IR::Value result = ir.Select(flag, src1, src0); + ir.SetVectorReg(dst_reg, IR::U32F32{result}); +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp new file mode 100644 index 00000000..47c98cd7 --- /dev/null +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::V_INTERP_P2_F32(const GcnInst& inst) { + const IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr}; + ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp new file mode 100644 index 00000000..ae82e3cc --- /dev/null +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { + IR::VectorReg dst_reg{inst.src[1].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code}; + const auto flags = ImageResFlags(inst.control.mimg.dmask); + const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code)); + const IR::Value tsharp = + ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(tsharp_reg + 1), + ir.GetScalarReg(tsharp_reg + 2), ir.GetScalarReg(tsharp_reg + 3)); + const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(false)); + + if (flags.test(ImageResComponent::Width)) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)}); + } + if (flags.test(ImageResComponent::Height)) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 1)}); + } + if (flags.test(ImageResComponent::Depth)) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 2)}); + } + if (flags.test(ImageResComponent::MipCount)) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 3)}); + } +} + +void Translator::IMAGE_SAMPLE(const GcnInst& inst) { + const auto& mimg = inst.control.mimg; + ASSERT(!mimg.da); + + IR::VectorReg addr_reg{inst.src[0].code}; + IR::VectorReg dest_reg{inst.dst[0].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; + const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; + const auto flags = MimgModifierFlags(mimg.mod); + + // Load first dword of T# and S#. We will use them as the handle that will guide resource + // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture + // binding index. + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); + + // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction + // Set Architecture + const IR::Value offset = + flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; + const IR::F32 bias = + flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; + const IR::F32 dref = + flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; + + // Derivatives are tricky because their number depends on the texture type which is located in + // T#. We don't have access to T# though until resource tracking pass. For now assume no + // derivatives are present, otherwise we don't know where coordinates are placed in the address + // stream. + ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction"); + + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler + // Since these are at most 4 dwords, we load them into a single uvec4 and place them + // in coords field of the instruction. Then the resource tracking pass will patch the + // IR instruction to fill in lod_clamp field. The vector can also be used + // as coords directly as SPIR-V will ignore any extra parameters. + const IR::Value body = + ir.CompositeConstruct(ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++), + ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++)); + + // Issue IR instruction, leaving unknown fields blank to patch later. + const IR::Value texel = [&]() -> IR::Value { + const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; + const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); + if (!flags.test(MimgModifier::Pcf)) { + if (explicit_lod) { + return ir.ImageSampleExplicitLod(handle, body, lod, offset, {}); + } else { + return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, {}); + } + } + if (explicit_lod) { + return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, {}); + } + return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, {}); + }(); + + for (u32 i = 0; i < 4; i++) { + if (((mimg.dmask >> i) & 1) == 0) { + continue; + } + IR::F32 value; + if (flags.test(MimgModifier::Pcf)) { + value = i < 3 ? IR::F32{texel} : ir.Imm32(1.0f); + } else { + value = IR::F32{ir.CompositeExtract(texel, i)}; + } + ir.SetVectorReg(dest_reg++, value); + } +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/abstract_syntax_list.h b/src/shader_recompiler/ir/abstract_syntax_list.h new file mode 100644 index 00000000..313a23ab --- /dev/null +++ b/src/shader_recompiler/ir/abstract_syntax_list.h @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +class Block; + +struct AbstractSyntaxNode { + enum class Type { + Block, + If, + EndIf, + Loop, + Repeat, + Break, + Return, + Unreachable, + }; + union Data { + Block* block; + struct { + U1 cond; + Block* body; + Block* merge; + } if_node; + struct { + Block* merge; + } end_if; + struct { + Block* body; + Block* continue_block; + Block* merge; + } loop; + struct { + U1 cond; + Block* loop_header; + Block* merge; + } repeat; + struct { + U1 cond; + Block* merge; + Block* skip; + } break_node; + }; + + Data data{}; + Type type{}; +}; +using AbstractSyntaxList = std::vector; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp new file mode 100644 index 00000000..714053bc --- /dev/null +++ b/src/shader_recompiler/ir/attribute.cpp @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/ir/attribute.h" + +namespace Shader::IR { + +bool IsParam(Attribute attribute) noexcept { + return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; +} + +std::string NameOf(Attribute attribute) { + switch (attribute) { + case Attribute::RenderTarget0: + return "RenderTarget0"; + case Attribute::RenderTarget1: + return "RenderTarget1"; + case Attribute::RenderTarget2: + return "RenderTarget2"; + case Attribute::RenderTarget3: + return "RenderTarget3"; + case Attribute::RenderTarget4: + return "RenderTarget4"; + case Attribute::RenderTarget5: + return "RenderTarget5"; + case Attribute::RenderTarget6: + return "RenderTarget6"; + case Attribute::RenderTarget7: + return "RenderTarget7"; + case Attribute::Depth: + return "Depth"; + case Attribute::Null: + return "Null"; + case Attribute::Position0: + return "Position0"; + case Attribute::Position1: + return "Position1"; + case Attribute::Position2: + return "Position2"; + case Attribute::Position3: + return "Position3"; + case Attribute::Param0: + return "Param0"; + case Attribute::Param1: + return "Param1"; + case Attribute::Param2: + return "Param2"; + case Attribute::Param3: + return "Param3"; + case Attribute::Param4: + return "Param4"; + case Attribute::Param5: + return "Param5"; + case Attribute::Param6: + return "Param6"; + case Attribute::Param7: + return "Param7"; + case Attribute::Param8: + return "Param8"; + case Attribute::Param9: + return "Param9"; + case Attribute::Param10: + return "Param10"; + case Attribute::Param11: + return "Param11"; + case Attribute::Param12: + return "Param12"; + case Attribute::Param13: + return "Param13"; + case Attribute::Param14: + return "Param14"; + case Attribute::Param15: + return "Param15"; + case Attribute::Param16: + return "Param16"; + case Attribute::Param17: + return "Param17"; + case Attribute::Param18: + return "Param18"; + case Attribute::Param19: + return "Param19"; + case Attribute::Param20: + return "Param20"; + case Attribute::Param21: + return "Param21"; + case Attribute::Param22: + return "Param22"; + case Attribute::Param23: + return "Param23"; + case Attribute::Param24: + return "Param24"; + case Attribute::Param25: + return "Param25"; + case Attribute::Param26: + return "Param26"; + case Attribute::Param27: + return "Param27"; + case Attribute::Param28: + return "Param28"; + case Attribute::Param29: + return "Param29"; + case Attribute::Param30: + return "Param30"; + case Attribute::Param31: + return "Param31"; + case Attribute::VertexId: + return "VertexId"; + default: + break; + } + return fmt::format("", static_cast(attribute)); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h new file mode 100644 index 00000000..a4d76dbf --- /dev/null +++ b/src/shader_recompiler/ir/attribute.h @@ -0,0 +1,105 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Attribute : u64 { + // Export targets + RenderTarget0 = 0, + RenderTarget1 = 1, + RenderTarget2 = 2, + RenderTarget3 = 3, + RenderTarget4 = 4, + RenderTarget5 = 5, + RenderTarget6 = 6, + RenderTarget7 = 7, + Depth = 8, + Null = 9, + Position0 = 12, + Position1 = 13, + Position2 = 14, + Position3 = 15, + Param0 = 32, + Param1 = 33, + Param2 = 34, + Param3 = 35, + Param4 = 36, + Param5 = 37, + Param6 = 38, + Param7 = 39, + Param8 = 40, + Param9 = 41, + Param10 = 42, + Param11 = 43, + Param12 = 44, + Param13 = 45, + Param14 = 46, + Param15 = 47, + Param16 = 48, + Param17 = 49, + Param18 = 50, + Param19 = 51, + Param20 = 52, + Param21 = 53, + Param22 = 54, + Param23 = 55, + Param24 = 56, + Param25 = 57, + Param26 = 58, + Param27 = 59, + Param28 = 60, + Param29 = 61, + Param30 = 62, + Param31 = 63, + // System values + ClipDistance = 64, + CullDistance = 65, + RenderTargetId = 66, + ViewportId = 67, + VertexId = 68, + PrimitiveId = 69, + InstanceId = 70, + IsFrontFace = 71, + SampleIndex = 72, + GlobalInvocationId = 73, + WorkgroupId = 74, + LocalInvocationId = 75, + LocalInvocationIndex = 76, + FragCoord = 77, +}; + +constexpr size_t EXP_NUM_POS = 4; +constexpr size_t EXP_NUM_PARAM = 32; + +[[nodiscard]] bool IsParam(Attribute attribute) noexcept; + +[[nodiscard]] std::string NameOf(Attribute attribute); + +[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) { + const int result{static_cast(attr) + num}; + if (result > static_cast(Attribute::Param31)) { + throw LogicError("Overflow on register arithmetic"); + } + if (result < static_cast(Attribute::Param0)) { + throw LogicError("Underflow on register arithmetic"); + } + return static_cast(result); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::IR::Attribute& attribute, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); + } +}; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp new file mode 100644 index 00000000..39174c56 --- /dev/null +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -0,0 +1,149 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +Block::Block(ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} + +Block::~Block() = default; + +void Block::AppendNewInst(Opcode op, std::initializer_list args) { + PrependNewInst(end(), op, args); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { + Inst* const inst{inst_pool->Create(base_inst)}; + return instructions.insert(insertion_point, *inst); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args, u32 flags) { + Inst* const inst{inst_pool->Create(op, flags)}; + const auto result_it{instructions.insert(insertion_point, *inst)}; + + if (inst->NumArgs() != args.size()) { + throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + } + std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { + inst->SetArg(index, arg); + ++index; + }); + return result_it; +} + +void Block::AddBranch(Block* block) { + if (std::ranges::find(imm_successors, block) != imm_successors.end()) { + throw LogicError("Successor already inserted"); + } + if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { + throw LogicError("Predecessor already inserted"); + } + imm_successors.push_back(block); + block->imm_predecessors.push_back(this); +} + +static std::string BlockToIndex(const std::map& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$", reinterpret_cast(block)); +} + +static size_t InstIndex(std::map& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + +static std::string ArgToIndex(std::map& inst_to_index, size_t& inst_index, + const Value& arg) { + if (arg.IsEmpty()) { + return ""; + } + if (!arg.IsImmediate() || arg.IsIdentity()) { + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); + } + switch (arg.Type()) { + case Type::U1: + return fmt::format("#{}", arg.U1() ? "true" : "false"); + case Type::U8: + return fmt::format("#{}", arg.U8()); + case Type::U16: + return fmt::format("#{}", arg.U16()); + case Type::U32: + return fmt::format("#{}", arg.U32()); + case Type::U64: + return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); + case Type::ScalarReg: + return fmt::format("{}", arg.ScalarReg()); + case Type::VectorReg: + return fmt::format("{}", arg.VectorReg()); + case Type::Attribute: + return fmt::format("{}", arg.Attribute()); + default: + return ""; + } +} + +std::string DumpBlock(const Block& block) { + size_t inst_index{0}; + std::map inst_to_index; + return DumpBlock(block, {}, inst_to_index, inst_index); +} + +std::string DumpBlock(const Block& block, const std::map& block_to_index, + std::map& inst_to_index, size_t& inst_index) { + std::string ret{"Block"}; + if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { + ret += fmt::format(" ${}", it->second); + } + ret += '\n'; + for (const Inst& inst : block) { + const Opcode op{inst.GetOpcode()}; + ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); + if (TypeOf(op) != Type::Void) { + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); + } else { + ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces + } + const size_t arg_count{inst.NumArgs()}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; + ret += arg_index != 0 ? ", " : " "; + if (op == Opcode::Phi) { + ret += fmt::format("[ {}, {} ]", arg_str, + BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); + } else { + ret += arg_str; + } + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } + } + } + if (TypeOf(op) != Type::Void) { + ret += fmt::format(" (uses: {})\n", inst.UseCount()); + } else { + ret += '\n'; + } + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h new file mode 100644 index 00000000..5cd36420 --- /dev/null +++ b/src/shader_recompiler/ir/basic_block.h @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/types.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/value.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +class Block { +public: + using InstructionList = boost::intrusive::list; + using size_type = InstructionList::size_type; + using iterator = InstructionList::iterator; + using const_iterator = InstructionList::const_iterator; + using reverse_iterator = InstructionList::reverse_iterator; + using const_reverse_iterator = InstructionList::const_reverse_iterator; + + explicit Block(ObjectPool& inst_pool_); + ~Block(); + + Block(const Block&) = delete; + Block& operator=(const Block&) = delete; + + Block(Block&&) = default; + Block& operator=(Block&&) = default; + + /// Appends a new instruction to the end of this basic block. + void AppendNewInst(Opcode op, std::initializer_list args); + + /// Prepends a copy of an instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); + + /// Prepends a new instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args = {}, u32 flags = 0); + + /// Adds a new branch to this basic block. + void AddBranch(Block* block); + + /// Gets a mutable reference to the instruction list for this basic block. + [[nodiscard]] InstructionList& Instructions() noexcept { + return instructions; + } + /// Gets an immutable reference to the instruction list for this basic block. + [[nodiscard]] const InstructionList& Instructions() const noexcept { + return instructions; + } + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span ImmPredecessors() const noexcept { + return imm_predecessors; + } + /// Gets an immutable span to the immediate successors. + [[nodiscard]] std::span ImmSuccessors() const noexcept { + return imm_successors; + } + + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(T def) { + definition = std::bit_cast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] T Definition() const noexcept { + return std::bit_cast(definition); + } + + void SsaSeal() noexcept { + is_ssa_sealed = true; + } + [[nodiscard]] bool IsSsaSealed() const noexcept { + return is_ssa_sealed; + } + + [[nodiscard]] bool empty() const { + return instructions.empty(); + } + [[nodiscard]] size_type size() const { + return instructions.size(); + } + + [[nodiscard]] Inst& front() { + return instructions.front(); + } + [[nodiscard]] const Inst& front() const { + return instructions.front(); + } + + [[nodiscard]] Inst& back() { + return instructions.back(); + } + [[nodiscard]] const Inst& back() const { + return instructions.back(); + } + + [[nodiscard]] iterator begin() { + return instructions.begin(); + } + [[nodiscard]] const_iterator begin() const { + return instructions.begin(); + } + [[nodiscard]] iterator end() { + return instructions.end(); + } + [[nodiscard]] const_iterator end() const { + return instructions.end(); + } + + [[nodiscard]] reverse_iterator rbegin() { + return instructions.rbegin(); + } + [[nodiscard]] const_reverse_iterator rbegin() const { + return instructions.rbegin(); + } + [[nodiscard]] reverse_iterator rend() { + return instructions.rend(); + } + [[nodiscard]] const_reverse_iterator rend() const { + return instructions.rend(); + } + + [[nodiscard]] const_iterator cbegin() const { + return instructions.cbegin(); + } + [[nodiscard]] const_iterator cend() const { + return instructions.cend(); + } + + [[nodiscard]] const_reverse_iterator crbegin() const { + return instructions.crbegin(); + } + [[nodiscard]] const_reverse_iterator crend() const { + return instructions.crend(); + } + + /// Intrusively store the value of a register in the block. + std::array ssa_sreg_values; + std::array ssa_vreg_values; + +private: + /// Memory pool for instruction list + ObjectPool* inst_pool; + + /// List of instructions in this block + InstructionList instructions; + + /// Block immediate predecessors + std::vector imm_predecessors; + /// Block immediate successors + std::vector imm_successors; + + /// Intrusively store if the block is sealed in the SSA pass. + bool is_ssa_sealed{false}; + + /// Intrusively stored host definition of this block. + u32 definition{}; +}; + +using BlockList = std::vector; + +[[nodiscard]] std::string DumpBlock(const Block& block); + +[[nodiscard]] std::string DumpBlock(const Block& block, + const std::map& block_to_index, + std::map& inst_to_index, + size_t& inst_index); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/condition.h b/src/shader_recompiler/ir/condition.h new file mode 100644 index 00000000..4b60be67 --- /dev/null +++ b/src/shader_recompiler/ir/condition.h @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader::IR { + +enum class Condition : u32 { + False, + True, + Scc0, + Scc1, + Vccz, + Vccnz, + Execz, + Execnz, +}; + +constexpr std::string_view NameOf(Condition condition) { + switch (condition) { + case Condition::False: + return "False"; + case Condition::True: + return "True"; + case Condition::Scc0: + return "Scc0"; + case Condition::Scc1: + return "Scc1"; + case Condition::Vccz: + return "Vccz"; + case Condition::Vccnz: + return "Vccnz"; + case Condition::Execz: + return "Execz"; + case Condition::Execnz: + return "Execnz"; + } +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter : formatter { + auto format(const Shader::IR::Condition& cond, format_context& ctx) const { + return formatter::format(NameOf(cond), ctx); + } +}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp new file mode 100644 index 00000000..aae23ef6 --- /dev/null +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -0,0 +1,1129 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/exception.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { + throw InvalidArgument("Invalid type {}", u32(type)); +} + +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + +U1 IREmitter::Imm1(bool value) const { + return U1{Value{value}}; +} + +U8 IREmitter::Imm8(u8 value) const { + return U8{Value{value}}; +} + +U16 IREmitter::Imm16(u16 value) const { + return U16{Value{value}}; +} + +U32 IREmitter::Imm32(u32 value) const { + return U32{Value{value}}; +} + +U32 IREmitter::Imm32(s32 value) const { + return U32{Value{static_cast(value)}}; +} + +F32 IREmitter::Imm32(f32 value) const { + return F32{Value{value}}; +} + +U64 IREmitter::Imm64(u64 value) const { + return U64{Value{value}}; +} + +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast(value)}}; +} + +F64 IREmitter::Imm64(f64 value) const { + return F64{Value{value}}; +} + +template <> +IR::U32 IREmitter::BitCast(const IR::F32& value) { + return Inst(Opcode::BitCastU32F32, value); +} + +template <> +IR::F32 IREmitter::BitCast(const IR::U32& value) { + return Inst(Opcode::BitCastF32U32, value); +} + +template <> +IR::U16 IREmitter::BitCast(const IR::F16& value) { + return Inst(Opcode::BitCastU16F16, value); +} + +template <> +IR::F16 IREmitter::BitCast(const IR::U16& value) { + return Inst(Opcode::BitCastF16U16, value); +} + +template <> +IR::U64 IREmitter::BitCast(const IR::F64& value) { + return Inst(Opcode::BitCastU64F64, value); +} + +template <> +IR::F64 IREmitter::BitCast(const IR::U64& value) { + return Inst(Opcode::BitCastF64U64, value); +} + +U1 IREmitter::ConditionRef(const U1& value) { + return Inst(Opcode::ConditionRef, value); +} + +void IREmitter::Reference(const Value& value) { + Inst(Opcode::Reference, value); +} + +void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { + Inst(Opcode::PhiMove, Value{&phi}, value); +} + +void IREmitter::Prologue() { + Inst(Opcode::Prologue); +} + +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); +} + +template <> +U32 IREmitter::GetScalarReg(IR::ScalarReg reg) { + return Inst(Opcode::GetScalarRegister, reg); +} + +template <> +F32 IREmitter::GetScalarReg(IR::ScalarReg reg) { + return BitCast(GetScalarReg(reg)); +} + +template <> +U32 IREmitter::GetVectorReg(IR::VectorReg reg) { + return Inst(Opcode::GetVectorRegister, reg); +} + +template <> +F32 IREmitter::GetVectorReg(IR::VectorReg reg) { + return BitCast(GetVectorReg(reg)); +} + +void IREmitter::SetScalarReg(IR::ScalarReg reg, const U32F32& value) { + const U32 value_typed = value.Type() == Type::F32 ? BitCast(F32{value}) : U32{value}; + Inst(Opcode::SetScalarRegister, reg, value_typed); +} + +void IREmitter::SetVectorReg(IR::VectorReg reg, const U32F32& value) { + const U32 value_typed = value.Type() == Type::F32 ? BitCast(F32{value}) : U32{value}; + Inst(Opcode::SetVectorRegister, reg, value_typed); +} + +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst(Opcode::GetGotoVariable, id); +} + +U1 IREmitter::Condition(IR::Condition cond) { + switch (cond) { + case IR::Condition::False: + return Imm1(false); + case IR::Condition::True: + return Imm1(true); + case IR::Condition::Scc0: + case IR::Condition::Scc1: + case IR::Condition::Vccz: + case IR::Condition::Vccnz: + case IR::Condition::Execz: + case IR::Condition::Execnz: + default: + throw NotImplementedException(""); + } +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + +U1 IREmitter::GetVcc() { + return Inst(Opcode::GetVcc); +} + +void IREmitter::SetVcc(const U1& value) { + Inst(Opcode::SetVcc, value); +} + +F32 IREmitter::GetAttribute(IR::Attribute attribute) { + return GetAttribute(attribute, 0); +} + +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); +} + +U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { + return GetAttributeU32(attribute, 0); +} + +U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { + return Inst(Opcode::GetAttributeU32, attribute, Imm32(comp)); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp) { + Inst(Opcode::SetAttribute, attribute, value, Imm32(comp)); +} + +U32U64 IREmitter::ReadShared(int bit_size, bool is_signed, const U32& offset) { + /*switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::ReadSharedS8 : Opcode::ReadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::ReadSharedS16 : Opcode::ReadSharedU16, offset); + case 32: + return Inst(Opcode::ReadSharedU32, offset); + case 64: + return Inst(Opcode::ReadSharedU64, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size);*/ +} + +void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { + /*switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + }*/ +} + +U32 IREmitter::ReadConst(const U64& address, const U32& offset) { + return Inst(Opcode::ReadConst, address, offset); +} + +template <> +U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) { + return Inst(Opcode::ReadConstBuffer, handle, index, offset); +} + +template <> +F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) { + return Inst(Opcode::ReadConstBufferF32, handle, index, offset); +} + +F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPAdd32, a, b); + case Type::F64: + return Inst(Opcode::FPAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { + if (e1.Type() != e2.Type()) { + throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x2, e1, e2); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x2, e1, e2); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x2, e1, e2); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x2, e1, e2); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { + throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), + e3.Type(), e4.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeExtract(const Value& vector, size_t element) { + const auto read{[&](Opcode opcode, size_t limit) -> Value { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, Value{static_cast(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return read(Opcode::CompositeExtractU32x2, 2); + case Type::U32x3: + return read(Opcode::CompositeExtractU32x3, 3); + case Type::U32x4: + return read(Opcode::CompositeExtractU32x4, 4); + case Type::F16x2: + return read(Opcode::CompositeExtractF16x2, 2); + case Type::F16x3: + return read(Opcode::CompositeExtractF16x3, 3); + case Type::F16x4: + return read(Opcode::CompositeExtractF16x4, 4); + case Type::F32x2: + return read(Opcode::CompositeExtractF32x2, 2); + case Type::F32x3: + return read(Opcode::CompositeExtractF32x3, 3); + case Type::F32x4: + return read(Opcode::CompositeExtractF32x4, 4); + case Type::F64x2: + return read(Opcode::CompositeExtractF64x2, 2); + case Type::F64x3: + return read(Opcode::CompositeExtractF64x3, 3); + case Type::F64x4: + return read(Opcode::CompositeExtractF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + +Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { + const auto insert{[&](Opcode opcode, size_t limit) { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, object, Value{static_cast(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return insert(Opcode::CompositeInsertU32x2, 2); + case Type::U32x3: + return insert(Opcode::CompositeInsertU32x3, 3); + case Type::U32x4: + return insert(Opcode::CompositeInsertU32x4, 4); + case Type::F16x2: + return insert(Opcode::CompositeInsertF16x2, 2); + case Type::F16x3: + return insert(Opcode::CompositeInsertF16x3, 3); + case Type::F16x4: + return insert(Opcode::CompositeInsertF16x4, 4); + case Type::F32x2: + return insert(Opcode::CompositeInsertF32x2, 2); + case Type::F32x3: + return insert(Opcode::CompositeInsertF32x3, 3); + case Type::F32x4: + return insert(Opcode::CompositeInsertF32x4, 4); + case Type::F64x2: + return insert(Opcode::CompositeInsertF64x2, 2); + case Type::F64x3: + return insert(Opcode::CompositeInsertF64x3, 3); + case Type::F64x4: + return insert(Opcode::CompositeInsertF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + +Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { + if (true_value.Type() != false_value.Type()) { + throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); + } + switch (true_value.Type()) { + case Type::U1: + return Inst(Opcode::SelectU1, condition, true_value, false_value); + case Type::U8: + return Inst(Opcode::SelectU8, condition, true_value, false_value); + case Type::U16: + return Inst(Opcode::SelectU16, condition, true_value, false_value); + case Type::U32: + return Inst(Opcode::SelectU32, condition, true_value, false_value); + case Type::U64: + return Inst(Opcode::SelectU64, condition, true_value, false_value); + case Type::F32: + return Inst(Opcode::SelectF32, condition, true_value, false_value); + case Type::F64: + return Inst(Opcode::SelectF64, condition, true_value, false_value); + default: + throw InvalidArgument("Invalid type {}", true_value.Type()); + } +} + +U64 IREmitter::PackUint2x32(const Value& vector) { + return Inst(Opcode::PackUint2x32, vector); +} + +Value IREmitter::UnpackUint2x32(const U64& value) { + return Inst(Opcode::UnpackUint2x32, value); +} + +U32 IREmitter::PackFloat2x16(const Value& vector) { + return Inst(Opcode::PackFloat2x16, vector); +} + +Value IREmitter::UnpackFloat2x16(const U32& value) { + return Inst(Opcode::UnpackFloat2x16, value); +} + +U32 IREmitter::PackHalf2x16(const Value& vector) { + return Inst(Opcode::PackHalf2x16, vector); +} + +Value IREmitter::UnpackHalf2x16(const U32& value) { + return Inst(Opcode::UnpackHalf2x16, value); +} + +F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPMul32, a, b); + case Type::F64: + return Inst(Opcode::FPMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) { + if (a.Type() != b.Type() || a.Type() != c.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPFma32, a, b, c); + case Type::F64: + return Inst(Opcode::FPFma64, a, b, c); + default: + ThrowInvalidType(a.Type()); + } +} + +F32F64 IREmitter::FPAbs(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPAbs32, value); + case Type::F64: + return Inst(Opcode::FPAbs64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPNeg(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPNeg32, value); + case Type::F64: + return Inst(Opcode::FPNeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPAbsNeg(const F32F64& value, bool abs, bool neg) { + F32F64 result{value}; + if (abs) { + result = FPAbs(result); + } + if (neg) { + result = FPNeg(result); + } + return result; +} + +F32 IREmitter::FPCos(const F32& value) { + return Inst(Opcode::FPCos, value); +} + +F32 IREmitter::FPSin(const F32& value) { + return Inst(Opcode::FPSin, value); +} + +F32 IREmitter::FPExp2(const F32& value) { + return Inst(Opcode::FPExp2, value); +} + +F32 IREmitter::FPLog2(const F32& value) { + return Inst(Opcode::FPLog2, value); +} + +F32F64 IREmitter::FPRecip(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPRecip32, value); + case Type::F64: + return Inst(Opcode::FPRecip64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPRecipSqrt32, value); + case Type::F64: + return Inst(Opcode::FPRecipSqrt64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32 IREmitter::FPSqrt(const F32& value) { + return Inst(Opcode::FPSqrt, value); +} + +F32F64 IREmitter::FPSaturate(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPSaturate32, value); + case Type::F64: + return Inst(Opcode::FPSaturate64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPClamp(const F32F64& value, const F32F64& min_value, const F32F64& max_value) { + if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), + max_value.Type()); + } + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPClamp32, value, min_value, max_value); + case Type::F64: + return Inst(Opcode::FPClamp64, value, min_value, max_value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPRoundEven(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPRoundEven32, value); + case Type::F64: + return Inst(Opcode::FPRoundEven64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPFloor(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPFloor32, value); + case Type::F64: + return Inst(Opcode::FPFloor64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPCeil(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPCeil32, value); + case Type::F64: + return Inst(Opcode::FPCeil64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPTrunc(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPTrunc32, value); + case Type::F64: + return Inst(Opcode::FPTrunc64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, + rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 + : Opcode::FPUnordGreaterThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 + : Opcode::FPUnordGreaterThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPIsNan(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::FPIsNan32, value); + case Type::F64: + return Inst(Opcode::FPIsNan64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); +} + +U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); +} + +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMax32, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMax64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMin32, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMin64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IAdd32, a, b); + case Type::U64: + return Inst(Opcode::IAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::ISub32, a, b); + case Type::U64: + return Inst(Opcode::ISub64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::IMul(const U32& a, const U32& b) { + return Inst(Opcode::IMul32, a, b); +} + +U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { + return Inst(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b); +} + +U32U64 IREmitter::INeg(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::INeg32, value); + case Type::U64: + return Inst(Opcode::INeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::IAbs(const U32& value) { + return Inst(Opcode::IAbs32, value); +} + +U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftLeftLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftLeftLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightArithmetic32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightArithmetic64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseAnd32, a, b); +} + +U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseOr32, a, b); +} + +U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseXor32, a, b); +} + +U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count) { + return Inst(Opcode::BitFieldInsert, base, insert, offset, count); +} + +U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed) { + return Inst(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, + count); +} + +U32 IREmitter::BitReverse(const U32& value) { + return Inst(Opcode::BitReverse32, value); +} + +U32 IREmitter::BitCount(const U32& value) { + return Inst(Opcode::BitCount32, value); +} + +U32 IREmitter::BitwiseNot(const U32& value) { + return Inst(Opcode::BitwiseNot32, value); +} + +U32 IREmitter::FindSMsb(const U32& value) { + return Inst(Opcode::FindSMsb32, value); +} + +U32 IREmitter::FindUMsb(const U32& value) { + return Inst(Opcode::FindUMsb32, value); +} + +U32 IREmitter::SMin(const U32& a, const U32& b) { + return Inst(Opcode::SMin32, a, b); +} + +U32 IREmitter::UMin(const U32& a, const U32& b) { + return Inst(Opcode::UMin32, a, b); +} + +U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMin(a, b) : UMin(a, b); +} + +U32 IREmitter::SMax(const U32& a, const U32& b) { + return Inst(Opcode::SMax32, a, b); +} + +U32 IREmitter::UMax(const U32& a, const U32& b) { + return Inst(Opcode::UMax32, a, b); +} + +U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMax(a, b) : UMax(a, b); +} + +U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::SClamp32, value, min, max); +} + +U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::UClamp32, value, min, max); +} + +U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +} + +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(Opcode::IEqual, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +} + +U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { + return Inst(Opcode::INotEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +} + +U1 IREmitter::LogicalOr(const U1& a, const U1& b) { + return Inst(Opcode::LogicalOr, a, b); +} + +U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { + return Inst(Opcode::LogicalAnd, a, b); +} + +U1 IREmitter::LogicalXor(const U1& a, const U1& b) { + return Inst(Opcode::LogicalXor, a, b); +} + +U1 IREmitter::LogicalNot(const U1& value) { + return Inst(Opcode::LogicalNot, value); +} + +U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) { + switch (bitsize) { + case 32: + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::ConvertS32F32, value); + case Type::F64: + return Inst(Opcode::ConvertS32F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { + switch (bitsize) { + case 32: + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::ConvertU32F32, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F32F64& value) { + return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); +} + +F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { + case 32: + switch (src_bitsize) { + case 32: + return Inst(Opcode::ConvertF32S32, value); + } + break; + case 64: + switch (src_bitsize) { + case 32: + return Inst(Opcode::ConvertF64S32, value); + } + break; + } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); +} + +F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { + case 32: + switch (src_bitsize) { + case 32: + return Inst(Opcode::ConvertF32U32, value); + } + break; + case 64: + switch (src_bitsize) { + case 32: + return Inst(Opcode::ConvertF64U32, value); + } + break; + } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); +} + +F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value) { + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value) + : ConvertUToF(dest_bitsize, src_bitsize, value); +} + +U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { + switch (result_bitsize) { + case 16: + switch (value.Type()) { + case Type::F32: + return Inst(Opcode::ConvertF16F32, value); + } + break; + case 32: + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::ConvertF32F16, value); + } + break; + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias_lc, + offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, + offset); +} + +Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info) { + return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2); +} + +Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info) { + return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref); +} + +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info) { + return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling); +} + +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips) { + return Inst(Opcode::ImageQueryDimensions, handle, lod, skip_mips); +} + +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips, TextureInstInfo info) { + return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); +} + +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { + return Inst(Opcode::ImageQueryLod, Flags{info}, handle, coords); +} + +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, + const Value& offset, const F32& lod_clamp, TextureInstInfo info) { + return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives, offset, lod_clamp); +} + +Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { + return Inst(Opcode::ImageRead, Flags{info}, handle, coords); +} + +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info) { + Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h new file mode 100644 index 00000000..8c8f657e --- /dev/null +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -0,0 +1,250 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/condition.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +class IREmitter { +public: + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} + explicit IREmitter(Block& block_, Block::iterator insertion_point_) + : block{&block_}, insertion_point{insertion_point_} {} + + Block* block; + + [[nodiscard]] U1 Imm1(bool value) const; + [[nodiscard]] U8 Imm8(u8 value) const; + [[nodiscard]] U16 Imm16(u16 value) const; + [[nodiscard]] U32 Imm32(u32 value) const; + [[nodiscard]] U32 Imm32(s32 value) const; + [[nodiscard]] F32 Imm32(f32 value) const; + [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; + [[nodiscard]] F64 Imm64(f64 value) const; + + template + [[nodiscard]] Dest BitCast(const Source& value); + + U1 ConditionRef(const U1& value); + void Reference(const Value& value); + + void PhiMove(IR::Inst& phi, const Value& value); + + void Prologue(); + void Epilogue(); + + template + [[nodiscard]] T GetScalarReg(IR::ScalarReg reg); + template + [[nodiscard]] T GetVectorReg(IR::VectorReg reg); + void SetScalarReg(IR::ScalarReg reg, const U32F32& value); + void SetVectorReg(IR::VectorReg reg, const U32F32& value); + + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + + [[nodiscard]] U1 GetVcc(); + + void SetVcc(const U1& value); + + [[nodiscard]] U1 Condition(IR::Condition cond); + + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp); + void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp); + + [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const Value& value, const U32& offset); + + [[nodiscard]] U32 ReadConst(const U64& address, const U32& offset); + template + [[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset); + + [[nodiscard]] U1 GetZeroFromOp(const Value& op); + [[nodiscard]] U1 GetSignFromOp(const Value& op); + [[nodiscard]] U1 GetCarryFromOp(const Value& op); + [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); + [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); + + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, + const Value& false_value); + + [[nodiscard]] U64 PackUint2x32(const Value& vector); + [[nodiscard]] Value UnpackUint2x32(const U64& value); + + [[nodiscard]] U32 PackFloat2x16(const Value& vector); + [[nodiscard]] Value UnpackFloat2x16(const U32& value); + + [[nodiscard]] U32 PackHalf2x16(const Value& vector); + [[nodiscard]] Value UnpackHalf2x16(const U32& value); + + [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c); + + [[nodiscard]] F32F64 FPAbs(const F32F64& value); + [[nodiscard]] F32F64 FPNeg(const F32F64& value); + [[nodiscard]] F32F64 FPAbsNeg(const F32F64& value, bool abs, bool neg); + + [[nodiscard]] F32 FPCos(const F32& value); + [[nodiscard]] F32 FPSin(const F32& value); + [[nodiscard]] F32 FPExp2(const F32& value); + [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32F64 FPRecip(const F32F64& value); + [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); + [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F32F64 FPSaturate(const F32F64& value); + [[nodiscard]] F32F64 FPClamp(const F32F64& value, const F32F64& min_value, + const F32F64& max_value); + [[nodiscard]] F32F64 FPRoundEven(const F32F64& value); + [[nodiscard]] F32F64 FPFloor(const F32F64& value); + [[nodiscard]] F32F64 FPCeil(const F32F64& value); + [[nodiscard]] F32F64 FPTrunc(const F32F64& value); + + [[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPGreaterThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPIsNan(const F32F64& value); + [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs); + [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs); + + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); + [[nodiscard]] U32U64 INeg(const U32U64& value); + [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); + [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); + [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count); + [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed = false); + [[nodiscard]] U32 BitReverse(const U32& value); + [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitwiseNot(const U32& value); + + [[nodiscard]] U32 FindSMsb(const U32& value); + [[nodiscard]] U32 FindUMsb(const U32& value); + [[nodiscard]] U32 SMin(const U32& a, const U32& b); + [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SMax(const U32& a, const U32& b); + [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); + [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); + + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); + [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalNot(const U1& value); + + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F32F64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F32F64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F32F64& value); + [[nodiscard]] F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value); + [[nodiscard]] F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value); + [[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value); + + [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips, TextureInstInfo info); + + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, + TextureInstInfo info); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info); + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, + const Value& offset, const Value& offset2, const F32& dref, + TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, + const Value& derivatives, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); + void ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info); + +private: + IR::Block::iterator insertion_point; + + template + T Inst(Opcode op, Args... args) { + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; + return T{Value{&*it}}; + } + + template + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v) + struct Flags { + Flags() = default; + Flags(T proxy_) : proxy{proxy_} {} + + T proxy; + }; + + template + T Inst(Opcode op, Flags flags, Args... args) { + u32 raw_flags{}; + std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + return T{Value{&*it}}; + } +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp new file mode 100644 index 00000000..5e616b53 --- /dev/null +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { + if (op == Opcode::Phi) { + std::construct_at(&phi_args); + } else { + std::construct_at(&args); + } +} + +Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { + if (base.op == Opcode::Phi) { + throw NotImplementedException("Copying phi node"); + } + std::construct_at(&args); + const size_t num_args{base.NumArgs()}; + for (size_t index = 0; index < num_args; ++index) { + SetArg(index, base.Arg(index)); + } +} + +Inst::~Inst() { + if (op == Opcode::Phi) { + std::destroy_at(&phi_args); + } else { + std::destroy_at(&args); + } +} + +bool Inst::MayHaveSideEffects() const noexcept { + switch (op) { + case Opcode::ConditionRef: + case Opcode::Reference: + case Opcode::PhiMove: + case Opcode::Prologue: + case Opcode::Epilogue: + // case Opcode::Join: + // case Opcode::Barrier: + // case Opcode::WorkgroupMemoryBarrier: + // case Opcode::DeviceMemoryBarrier: + // case Opcode::EmitVertex: + // case Opcode::EndPrimitive: + case Opcode::SetAttribute: + // case Opcode::SetFragColor: + // case Opcode::SetFragDepth: + return true; + default: + return false; + } +} + +bool Inst::AreAllArgsImmediates() const { + if (op == Opcode::Phi) { + throw LogicError("Testing for all arguments are immediates on phi instruction"); + } + return std::all_of(args.begin(), args.begin() + NumArgs(), + [](const IR::Value& value) { return value.IsImmediate(); }); +} + +IR::Type Inst::Type() const { + return TypeOf(op); +} + +void Inst::SetArg(size_t index, Value value) { + if (index >= NumArgs()) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + const IR::Value arg{Arg(index)}; + if (!arg.IsImmediate()) { + UndoUse(arg); + } + if (!value.IsImmediate()) { + Use(value); + } + if (op == Opcode::Phi) { + phi_args[index].second = value; + } else { + args[index] = value; + } +} + +Block* Inst::PhiBlock(size_t index) const { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + } + return phi_args[index].first; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_args.emplace_back(predecessor, value); +} + +void Inst::Invalidate() { + ClearArgs(); + ReplaceOpcode(Opcode::Void); +} + +void Inst::ClearArgs() { + if (op == Opcode::Phi) { + for (auto& pair : phi_args) { + IR::Value& value{pair.second}; + if (!value.IsImmediate()) { + UndoUse(value); + } + } + phi_args.clear(); + } else { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + } + // Reset arguments to null + // std::memset was measured to be faster on MSVC than std::ranges:fill + std::memset(reinterpret_cast(&args), 0, sizeof(args)); + } +} + +void Inst::ReplaceUsesWith(Value replacement) { + Invalidate(); + ReplaceOpcode(Opcode::Identity); + if (!replacement.IsImmediate()) { + Use(replacement); + } + args[0] = replacement; +} + +void Inst::ReplaceOpcode(IR::Opcode opcode) { + if (opcode == IR::Opcode::Phi) { + throw LogicError("Cannot transition into Phi"); + } + if (op == Opcode::Phi) { + // Transition out of phi arguments into non-phi + std::destroy_at(&phi_args); + std::construct_at(&args); + } + op = opcode; +} + +void Inst::Use(const Value& value) { + Inst* const inst{value.Inst()}; + ++inst->use_count; +} + +void Inst::UndoUse(const Value& value) { + Inst* const inst{value.Inst()}; + --inst->use_count; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/opcodes.cpp b/src/shader_recompiler/ir/opcodes.cpp new file mode 100644 index 00000000..18f82cb8 --- /dev/null +++ b/src/shader_recompiler/ir/opcodes.cpp @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/opcodes.h" + +namespace Shader::IR { + +std::string_view NameOf(Opcode op) { + return Detail::META_TABLE[static_cast(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h new file mode 100644 index 00000000..66b60221 --- /dev/null +++ b/src/shader_recompiler/ir/opcodes.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/types.h" +#include "shader_recompiler/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcodes.inc" +#undef OPCODE +}; + +namespace Detail { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type ScalarReg{Type::ScalarReg}; +constexpr Type VectorReg{Type::VectorReg}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type SystemValue{Type::SystemValue}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; + +constexpr OpcodeMeta META_TABLE[]{ +#define OPCODE(name_token, type_token, ...) \ + { \ + .name{#name_token}, \ + .type = type_token, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + return static_cast( + std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); +} + +constexpr u8 NUM_ARGS[]{ +#define OPCODE(name_token, type_token, ...) static_cast(CalculateNumArgsOf(Opcode::name_token)), +#include "opcodes.inc" +#undef OPCODE +}; +} // namespace Detail + +/// Get return type of an opcode +[[nodiscard]] inline Type TypeOf(Opcode op) noexcept { + return Detail::META_TABLE[static_cast(op)].type; +} + +/// Get the number of arguments an opcode accepts +[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { + return static_cast(Detail::NUM_ARGS[static_cast(op)]); +} + +/// Get the required type of an argument of an opcode +[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { + return Detail::META_TABLE[static_cast(op)].arg_types[arg_index]; +} + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc new file mode 100644 index 00000000..59687707 --- /dev/null +++ b/src/shader_recompiler/ir/opcodes.inc @@ -0,0 +1,247 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) +OPCODE(ConditionRef, U1, U1, ) +OPCODE(Reference, Void, Opaque, ) +OPCODE(PhiMove, Void, Opaque, Opaque, ) + +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) + +// Constant memory operations +OPCODE(ReadConst, U32, U64, U32, ) +OPCODE(ReadConstBuffer, U32, Opaque, U32, U32 ) +OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 ) + +// Context getters/setters +OPCODE(GetScalarRegister, U32, ScalarReg, ) +OPCODE(SetScalarRegister, Void, ScalarReg, U32, ) +OPCODE(GetVectorRegister, U32, VectorReg, ) +OPCODE(SetVectorRegister, Void, VectorReg, U32, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(GetAttributeU32, U32, Attribute, U32, ) +OPCODE(SetAttribute, Void, Attribute, F32, U32, ) + +// Flags +//OPCODE(GetScc, U1, Void, ) +OPCODE(GetVcc, U1, Void, ) +//OPCODE(SetScc, Void, U1, ) +OPCODE(SetVcc, Void, U1, ) + +// Undefined +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) + +// Vector utility +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) + +// Select operations +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectF64, F64, U1, F64, F64, ) + +// Bitwise conversions +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) + +// Floating-point operations +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPClamp32, F32, F32, F32, F32, ) +OPCODE(FPClamp64, F64, F64, F64, F64, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) + +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan32, U1, F32, ) +OPCODE(FPIsNan64, U1, F64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(SDiv32, U32, U32, U32, ) +OPCODE(UDiv32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) + +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SClamp32, U32, U32, U32, U32, ) +OPCODE(UClamp32, U32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64U32, F64, U32, ) + +// Image operations +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp new file mode 100644 index 00000000..40ee1c2b --- /dev/null +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -0,0 +1,403 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include "common/func_traits.h" +#include "shader_recompiler/ir/basic_block.h" + +namespace Shader::Optimization { + +template +[[nodiscard]] T Arg(const IR::Value& value) { + if constexpr (std::is_same_v) { + return value.U1(); + } else if constexpr (std::is_same_v) { + return value.U32(); + } else if constexpr (std::is_same_v) { + return static_cast(value.U32()); + } else if constexpr (std::is_same_v) { + return value.F32(); + } else if constexpr (std::is_same_v) { + return value.U64(); + } +} + +template +IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { + using Traits = Common::LambdaTraits; + return IR::Value{func(Arg>(inst.Arg(I))...)}; +} + +template +bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { + const IR::Value lhs{inst.Arg(0)}; + const IR::Value rhs{inst.Arg(1)}; + + const bool is_lhs_immediate{lhs.IsImmediate()}; + const bool is_rhs_immediate{rhs.IsImmediate()}; + + if (is_lhs_immediate && is_rhs_immediate) { + const auto result{imm_fn(Arg(lhs), Arg(rhs))}; + inst.ReplaceUsesWith(IR::Value{result}); + return false; + } + if (is_lhs_immediate && !is_rhs_immediate) { + IR::Inst* const rhs_inst{rhs.InstRecursive()}; + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; + inst.SetArg(0, rhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* const lhs_inst{lhs.InstRecursive()}; + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; + inst.SetArg(0, lhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } + } + return true; +} + +template +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() /*|| inst.HasAssociatedPseudoOperation()*/) { + return false; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + +template +void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{std::bit_cast(Arg(value))}); + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->GetOpcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } + } +} + +std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, + IR::Opcode construct, u32 first_index) { + IR::Inst* const inst{inst_value.InstRecursive()}; + if (inst->GetOpcode() == construct) { + return inst->Arg(first_index); + } + if (inst->GetOpcode() != insert) { + return std::nullopt; + } + IR::Value value_index{inst->Arg(2)}; + if (!value_index.IsImmediate()) { + return std::nullopt; + } + const u32 second_index{value_index.U32()}; + if (first_index != second_index) { + IR::Value value_composite{inst->Arg(0)}; + if (value_composite.IsImmediate()) { + return std::nullopt; + } + return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); + } + return inst->Arg(1); +} + +void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { + const IR::Value value_1{inst.Arg(0)}; + const IR::Value value_2{inst.Arg(1)}; + if (value_1.IsImmediate()) { + return; + } + if (!value_2.IsImmediate()) { + return; + } + const u32 first_index{value_2.U32()}; + const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; + if (!result) { + return; + } + inst.ReplaceUsesWith(*result); +} + +void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + return; + } + IR::Inst* const producer{value.InstRecursive()}; + if (producer->GetOpcode() == opposite) { + inst.ReplaceUsesWith(producer->Arg(0)); + } +} + +void FoldLogicalAnd(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(inst.Arg(0)); + } else { + inst.ReplaceUsesWith(IR::Value{false}); + } + } +} + +void FoldSelect(IR::Inst& inst) { + const IR::Value cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + } +} + +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + +void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->GetOpcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } +} + +template +void FoldAdd(IR::Block& block, IR::Inst& inst) { + if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWith(inst.Arg(0)); + return; + } +} + +template +bool IsArgImm(const IR::Inst& inst, u32 imm) { + const IR::Value& arg = inst.Arg(idx); + return arg.IsImmediate() && arg.U32() == imm; +}; + +void FoldBooleanConvert(IR::Inst& inst) { + // Eliminate pattern + // %4 = + // %5 = SelectU32 %4, #1, #0 (uses: 2) + // %8 = INotEqual %5, #0 (uses: 1) + if (!IsArgImm<1>(inst, 0)) { + return; + } + IR::Inst* prod = inst.Arg(0).TryInstRecursive(); + if (!prod || prod->GetOpcode() != IR::Opcode::SelectU32) { + return; + } + if (IsArgImm<1>(*prod, 1) && IsArgImm<2>(*prod, 0)) { + inst.ReplaceUsesWith(prod->Arg(0)); + } +} + +void ConstantPropagation(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::IAdd32: + return FoldAdd(block, inst); + case IR::Opcode::IMul32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + return; + case IR::Opcode::ShiftRightArithmetic32: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast(a >> b); }); + return; + case IR::Opcode::BitCastF32U32: + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + case IR::Opcode::BitCastU32F32: + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); + case IR::Opcode::PackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); + case IR::Opcode::UnpackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); + case IR::Opcode::PackFloat2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16); + case IR::Opcode::UnpackFloat2x16: + return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16); + case IR::Opcode::SelectU1: + case IR::Opcode::SelectU8: + case IR::Opcode::SelectU16: + case IR::Opcode::SelectU32: + case IR::Opcode::SelectU64: + case IR::Opcode::SelectF32: + case IR::Opcode::SelectF64: + return FoldSelect(inst); + case IR::Opcode::FPNeg32: + FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); + return; + case IR::Opcode::LogicalAnd: + return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); + case IR::Opcode::SLessThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; + case IR::Opcode::ULessThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; + case IR::Opcode::SLessThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); + return; + case IR::Opcode::IEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); + return; + case IR::Opcode::INotEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); + FoldBooleanConvert(inst); + return; + case IR::Opcode::BitwiseAnd32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); + return; + case IR::Opcode::BitwiseOr32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); + return; + case IR::Opcode::BitwiseXor32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); + return; + case IR::Opcode::BitFieldUExtract: + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + if (static_cast(shift) + static_cast(count) > 32) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); + }); + return; + case IR::Opcode::BitFieldSExtract: + FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { + const size_t back_shift{static_cast(shift) + static_cast(count)}; + const size_t left_shift{32 - back_shift}; + const size_t right_shift{static_cast(32 - count)}; + if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, + base, shift, count); + } + return static_cast((base << left_shift) >> right_shift); + }); + return; + case IR::Opcode::BitFieldInsert: + FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { + if (bits >= 32 || offset >= 32) { + throw LogicError("Undefined result in {}({}, {}, {}, {})", + IR::Opcode::BitFieldInsert, base, insert, offset, bits); + } + return (base & ~(~(~0u << bits) << offset)) | (insert << offset); + }); + return; + case IR::Opcode::CompositeExtractU32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, + IR::Opcode::CompositeInsertU32x2); + case IR::Opcode::CompositeExtractU32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, + IR::Opcode::CompositeInsertU32x3); + case IR::Opcode::CompositeExtractU32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, + IR::Opcode::CompositeInsertU32x4); + case IR::Opcode::CompositeExtractF32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, + IR::Opcode::CompositeInsertF32x2); + case IR::Opcode::CompositeExtractF32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, + IR::Opcode::CompositeInsertF32x3); + case IR::Opcode::CompositeExtractF32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, + IR::Opcode::CompositeInsertF32x4); + case IR::Opcode::CompositeExtractF16x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, + IR::Opcode::CompositeInsertF16x2); + case IR::Opcode::CompositeExtractF16x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, + IR::Opcode::CompositeInsertF16x3); + case IR::Opcode::CompositeExtractF16x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, + IR::Opcode::CompositeInsertF16x4); + case IR::Opcode::ConvertF32F16: + return FoldConvert(inst, IR::Opcode::ConvertF16F32); + case IR::Opcode::ConvertF16F32: + return FoldConvert(inst, IR::Opcode::ConvertF32F16); + default: + break; + } +} + +void ConstantPropagationPass(IR::BlockList& program) { + const auto end{program.rend()}; + for (auto it = program.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; + for (IR::Inst& inst : block->Instructions()) { + ConstantPropagation(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h new file mode 100644 index 00000000..49bb09b1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/passes.h @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/basic_block.h" + +namespace Shader::Optimization { + +void SsaRewritePass(IR::BlockList& program); +void IdentityRemovalPass(IR::BlockList& program); +void DeadCodeEliminationPass(IR::BlockList& program); +void ConstantPropagationPass(IR::BlockList& program); +void ResourceTrackingPass(IR::BlockList& program); + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp new file mode 100644 index 00000000..feb213df --- /dev/null +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -0,0 +1,131 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include + +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Optimization { +namespace { + +struct SharpLocation { + IR::ScalarReg eud_ptr; + u32 index_dwords; + + auto operator<=>(const SharpLocation&) const = default; +}; + +bool IsResourceInstruction(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ReadConstBuffer: + case IR::Opcode::ReadConstBufferF32: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageFetch: + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageQueryDimensions: + case IR::Opcode::ImageQueryLod: + case IR::Opcode::ImageGradient: + case IR::Opcode::ImageRead: + case IR::Opcode::ImageWrite: + return true; + default: + return false; + } +} + +/*class Descriptors { +public: + explicit Descriptors(TextureDescriptors& texture_descriptors_) + : texture_descriptors{texture_descriptors_} {} + + u32 Add(const TextureDescriptor& desc) { + const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.shift_left == existing.shift_left && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.secondary_shift_left == existing.secondary_shift_left && + desc.count == existing.count && desc.size_shift == existing.size_shift; + })}; + // TODO: Read this from TIC + texture_descriptors[index].is_multisample |= desc.is_multisample; + return index; + } + +private: + template + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; + } + + TextureDescriptors& texture_descriptors; +};*/ + +} // Anonymous namespace + +SharpLocation TrackSharp(const IR::Value& handle) { + IR::Inst* inst = handle.InstRecursive(); + if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) { + return SharpLocation{ + .eud_ptr = IR::ScalarReg::Max, + .index_dwords = inst->Arg(0).U32(), + }; + } + ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory"); + + // Retrieve offset from base. + IR::Inst* addr = inst->Arg(0).InstRecursive(); + u32 dword_offset = addr->Arg(1).U32(); + addr = addr->Arg(0).InstRecursive(); + ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported"); + dword_offset += addr->Arg(1).U32() >> 2; + + // Retrieve SGPR that holds sbase + inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive(); + ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister, + "Nested resource loads not supported"); + const IR::ScalarReg base = inst->Arg(0).ScalarReg(); + + // Return retrieved location. + return SharpLocation{ + .eud_ptr = base, + .index_dwords = dword_offset, + }; +} + +void ResourceTrackingPass(IR::BlockList& program) { + for (IR::Block* const block : program) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsResourceInstruction(inst)) { + continue; + } + printf("ff\n"); + IR::Inst* producer = inst.Arg(0).InstRecursive(); + const auto loc = TrackSharp(producer->Arg(0)); + fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, + loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp new file mode 100644 index 00000000..d267465f --- /dev/null +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -0,0 +1,408 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include +#include +#include +#include +#include + +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::Optimization { +namespace { +struct FlagTag { + auto operator<=>(const FlagTag&) const noexcept = default; +}; +struct ZeroFlagTag : FlagTag {}; +struct SignFlagTag : FlagTag {}; +struct CarryFlagTag : FlagTag {}; +struct OverflowFlagTag : FlagTag {}; +struct VccFlagTag : FlagTag {}; + +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + auto operator<=>(const GotoVariable&) const noexcept = default; + + u32 index; +}; + +using Variant = std::variant; +using ValueMap = std::unordered_map; + +struct DefTable { + const IR::Value& Def(IR::Block* block, IR::ScalarReg variable) { + return block->ssa_sreg_values[RegIndex(variable)]; + } + void SetDef(IR::Block* block, IR::ScalarReg variable, const IR::Value& value) { + block->ssa_sreg_values[RegIndex(variable)] = value; + } + + const IR::Value& Def(IR::Block* block, IR::VectorReg variable) { + return block->ssa_vreg_values[RegIndex(variable)]; + } + void SetDef(IR::Block* block, IR::VectorReg variable, const IR::Value& value) { + block->ssa_vreg_values[RegIndex(variable)] = value; + } + + const IR::Value& Def(IR::Block* block, GotoVariable variable) { + return goto_vars[variable.index][block]; + } + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { + goto_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { + return zero_flag[block]; + } + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { + zero_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, SignFlagTag) { + return sign_flag[block]; + } + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { + sign_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, CarryFlagTag) { + return carry_flag[block]; + } + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { + carry_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { + return overflow_flag[block]; + } + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { + overflow_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, VccFlagTag) { + return vcc_flag[block]; + } + void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) { + vcc_flag.insert_or_assign(block, value); + } + + std::unordered_map goto_vars; + ValueMap indirect_branch_var; + ValueMap zero_flag; + ValueMap sign_flag; + ValueMap carry_flag; + ValueMap overflow_flag; + ValueMap vcc_flag; +}; + +IR::Opcode UndefOpcode(IR::ScalarReg) noexcept { + return IR::Opcode::UndefU32; +} + +IR::Opcode UndefOpcode(IR::VectorReg) noexcept { + return IR::Opcode::UndefU32; +} + +IR::Opcode UndefOpcode(const FlagTag&) noexcept { + return IR::Opcode::UndefU1; +} + +enum class Status { + Start, + SetValue, + PreparePhiArgument, + PushPhiArgument, +}; + +template +struct ReadState { + ReadState(IR::Block* block_) : block{block_} {} + ReadState() = default; + + IR::Block* block{}; + IR::Value result{}; + IR::Inst* phi{}; + IR::Block* const* pred_it{}; + IR::Block* const* pred_end{}; + Status pc{Status::Start}; +}; + +class Pass { +public: + template + void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { + current_def.SetDef(block, variable, value); + } + + template + IR::Value ReadVariable(Type variable, IR::Block* root_block) { + boost::container::small_vector, 64> stack{ + ReadState(nullptr), + ReadState(root_block), + }; + const auto prepare_phi_operand = [&] { + if (stack.back().pred_it == stack.back().pred_end) { + IR::Inst* const phi{stack.back().phi}; + IR::Block* const block{stack.back().block}; + const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; + stack.pop_back(); + stack.back().result = result; + WriteVariable(variable, block, result); + } else { + IR::Block* const imm_pred{*stack.back().pred_it}; + stack.back().pc = Status::PushPhiArgument; + stack.emplace_back(imm_pred); + } + }; + do { + IR::Block* const block{stack.back().block}; + switch (stack.back().pc) { + case Status::Start: { + if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { + stack.back().result = def; + } else if (!block->IsSsaSealed()) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + + incomplete_phis[block].insert_or_assign(variable, phi); + stack.back().result = IR::Value{&*phi}; + } else if (const std::span imm_preds = block->ImmPredecessors(); + imm_preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed + stack.back().pc = Status::SetValue; + stack.emplace_back(imm_preds.front()); + break; + } else { + // Break potential cycles with operandless phi + IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + + WriteVariable(variable, block, IR::Value{phi}); + + stack.back().phi = phi; + stack.back().pred_it = imm_preds.data(); + stack.back().pred_end = imm_preds.data() + imm_preds.size(); + prepare_phi_operand(); + break; + } + } + [[fallthrough]]; + case Status::SetValue: { + const IR::Value result{stack.back().result}; + WriteVariable(variable, block, result); + stack.pop_back(); + stack.back().result = result; + break; + } + case Status::PushPhiArgument: { + IR::Inst* const phi{stack.back().phi}; + phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); + ++stack.back().pred_it; + } + [[fallthrough]]; + case Status::PreparePhiArgument: + prepare_phi_operand(); + break; + } + } while (stack.size() > 1); + return stack.back().result; + } + + void SealBlock(IR::Block* block) { + const auto it{incomplete_phis.find(block)}; + if (it != incomplete_phis.end()) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; + std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); + } + } + block->SsaSeal(); + } + +private: + template + IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { + for (IR::Block* const imm_pred : block->ImmPredecessors()) { + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + const size_t num_args{phi.NumArgs()}; + for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { + const IR::Value& op{phi.Arg(arg_index)}; + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return IR::Value{&phi}; + } + same = op; + } + // Remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Find the first non-phi instruction and use it as an insertion point + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + // Insert an undefined instruction and make it the phi node replacement + // The "phi" node reinsertion point is specified after this instruction + reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); + same = IR::Value{&*reinsert_point}; + ++reinsert_point; + } + // Reinsert the phi node and reroute all its uses to the "same" value + list.insert(reinsert_point, phi); + phi.ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + std::unordered_map> incomplete_phis; + DefTable current_def; +}; + +void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { + const IR::Opcode opcode{inst.GetOpcode()}; + switch (opcode) { + case IR::Opcode::SetScalarRegister: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + pass.WriteVariable(reg, block, inst.Arg(1)); + break; + } + case IR::Opcode::SetVectorRegister: { + const IR::VectorReg reg{inst.Arg(0).VectorReg()}; + pass.WriteVariable(reg, block, inst.Arg(1)); + break; + } + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; + case IR::Opcode::SetVcc: + pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0)); + break; + // case IR::Opcode::SetSFlag: + // pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); + // break; + // case IR::Opcode::SetCFlag: + // pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); + // break; + // case IR::Opcode::SetOFlag: + // pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); + // break; + case IR::Opcode::GetScalarRegister: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + break; + } + case IR::Opcode::GetVectorRegister: { + const IR::VectorReg reg{inst.Arg(0).VectorReg()}; + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + break; + } + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; + case IR::Opcode::GetVcc: + inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block)); + break; + // case IR::Opcode::GetSFlag: + // inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); + // break; + // case IR::Opcode::GetCFlag: + // inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); + // break; + // case IR::Opcode::GetOFlag: + // inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); + // break; + default: + break; + } +} + +void VisitBlock(Pass& pass, IR::Block* block) { + for (IR::Inst& inst : block->Instructions()) { + VisitInst(pass, block, inst); + } + pass.SealBlock(block); +} + +} // Anonymous namespace + +void SsaRewritePass(IR::BlockList& program) { + Pass pass; + const auto end{program.rend()}; + for (auto block = program.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); + } +} + +void IdentityRemovalPass(IR::BlockList& program) { + std::vector to_invalidate; + for (IR::Block* const block : program) { + for (auto inst = block->begin(); inst != block->end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block->Instructions().erase(inst); + } else { + ++inst; + } + } + } + for (IR::Inst* const inst : to_invalidate) { + inst->Invalidate(); + } +} + +void DeadCodeEliminationPass(IR::BlockList& program) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (IR::Block* const block : program) { + auto it{block->end()}; + while (it != block->begin()) { + --it; + if (!it->HasUses() && !it->MayHaveSideEffects()) { + it->Invalidate(); + it = block->Instructions().erase(it); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/post_order.cpp b/src/shader_recompiler/ir/post_order.cpp new file mode 100644 index 00000000..5ab72aa2 --- /dev/null +++ b/src/shader_recompiler/ir/post_order.cpp @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include "shader_recompiler/ir/post_order.h" + +namespace Shader::IR { + +BlockList PostOrder(const AbstractSyntaxNode& root) { + boost::container::small_vector block_stack; + boost::container::flat_set visited; + BlockList post_order_blocks; + + if (root.type != AbstractSyntaxNode::Type::Block) { + throw LogicError("First node in abstract syntax list root is not a block"); + } + Block* const first_block{root.data.block}; + visited.insert(first_block); + block_stack.push_back(first_block); + + while (!block_stack.empty()) { + Block* const block = block_stack.back(); + const auto visit = [&](Block* branch) { + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on MSVC + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }; + block_stack.pop_back(); + if (std::ranges::none_of(block->ImmSuccessors(), visit)) { + post_order_blocks.push_back(block); + } + } + return post_order_blocks; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/post_order.h b/src/shader_recompiler/ir/post_order.h new file mode 100644 index 00000000..854acd69 --- /dev/null +++ b/src/shader_recompiler/ir/post_order.h @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/abstract_syntax_list.h" +#include "shader_recompiler/ir/basic_block.h" + +namespace Shader::IR { + +BlockList PostOrder(const AbstractSyntaxNode& root); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp new file mode 100644 index 00000000..7728a3cc --- /dev/null +++ b/src/shader_recompiler/ir/program.cpp @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const IR::Block* const block : program.blocks) { + block_to_index.emplace(block, index); + ++index; + } + std::string ret; + for (const auto& block : program.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h new file mode 100644 index 00000000..f4f5197f --- /dev/null +++ b/src/shader_recompiler/ir/program.h @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/ir/abstract_syntax_list.h" +#include "shader_recompiler/ir/basic_block.h" + +namespace Shader { +enum class Stage : u32; +} + +namespace Shader::IR { + +struct Program { + AbstractSyntaxList syntax_list; + BlockList blocks; + BlockList post_order_blocks; + std::vector ins_list; + Stage stage; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h new file mode 100644 index 00000000..721d5356 --- /dev/null +++ b/src/shader_recompiler/ir/reg.h @@ -0,0 +1,471 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/bit_field.h" +#include "common/types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class FpRoundMode : u32 { + NearestEven = 0, + PlusInf = 1, + MinInf = 2, + ToZero = 3, +}; + +enum class FpDenormMode : u32 { + InOutFlush = 0, + InAllowOutFlush = 1, + InFlushOutAllow = 2, + InOutAllow = 3, +}; + +union Mode { + BitField<0, 4, FpRoundMode> fp_round; + BitField<4, 2, FpDenormMode> fp_denorm_single; + BitField<6, 2, FpDenormMode> fp_denorm_double; + BitField<8, 1, u32> dx10_clamp; +}; + +union TextureInstInfo { + u32 raw; + BitField<0, 16, u32> descriptor_index; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivatives; +}; + +enum class ScalarReg : u32 { + S0, + S1, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + S12, + S13, + S14, + S15, + S16, + S17, + S18, + S19, + S20, + S21, + S22, + S23, + S24, + S25, + S26, + S27, + S28, + S29, + S30, + S31, + S32, + S33, + S34, + S35, + S36, + S37, + S38, + S39, + S40, + S41, + S42, + S43, + S44, + S45, + S46, + S47, + S48, + S49, + S50, + S51, + S52, + S53, + S54, + S55, + S56, + S57, + S58, + S59, + S60, + S61, + S62, + S63, + S64, + S65, + S66, + S67, + S68, + S69, + S70, + S71, + S72, + S73, + S74, + S75, + S76, + S77, + S78, + S79, + S80, + S81, + S82, + S83, + S84, + S85, + S86, + S87, + S88, + S89, + S90, + S91, + S92, + S93, + S94, + S95, + S96, + S97, + S98, + S99, + S100, + S101, + S102, + S103, + Max, +}; +static constexpr size_t NumScalarRegs = static_cast(ScalarReg::Max); + +enum class VectorReg : u32 { + V0, + V1, + V2, + V3, + V4, + V5, + V6, + V7, + V8, + V9, + V10, + V11, + V12, + V13, + V14, + V15, + V16, + V17, + V18, + V19, + V20, + V21, + V22, + V23, + V24, + V25, + V26, + V27, + V28, + V29, + V30, + V31, + V32, + V33, + V34, + V35, + V36, + V37, + V38, + V39, + V40, + V41, + V42, + V43, + V44, + V45, + V46, + V47, + V48, + V49, + V50, + V51, + V52, + V53, + V54, + V55, + V56, + V57, + V58, + V59, + V60, + V61, + V62, + V63, + V64, + V65, + V66, + V67, + V68, + V69, + V70, + V71, + V72, + V73, + V74, + V75, + V76, + V77, + V78, + V79, + V80, + V81, + V82, + V83, + V84, + V85, + V86, + V87, + V88, + V89, + V90, + V91, + V92, + V93, + V94, + V95, + V96, + V97, + V98, + V99, + V100, + V101, + V102, + V103, + V104, + V105, + V106, + V107, + V108, + V109, + V110, + V111, + V112, + V113, + V114, + V115, + V116, + V117, + V118, + V119, + V120, + V121, + V122, + V123, + V124, + V125, + V126, + V127, + V128, + V129, + V130, + V131, + V132, + V133, + V134, + V135, + V136, + V137, + V138, + V139, + V140, + V141, + V142, + V143, + V144, + V145, + V146, + V147, + V148, + V149, + V150, + V151, + V152, + V153, + V154, + V155, + V156, + V157, + V158, + V159, + V160, + V161, + V162, + V163, + V164, + V165, + V166, + V167, + V168, + V169, + V170, + V171, + V172, + V173, + V174, + V175, + V176, + V177, + V178, + V179, + V180, + V181, + V182, + V183, + V184, + V185, + V186, + V187, + V188, + V189, + V190, + V191, + V192, + V193, + V194, + V195, + V196, + V197, + V198, + V199, + V200, + V201, + V202, + V203, + V204, + V205, + V206, + V207, + V208, + V209, + V210, + V211, + V212, + V213, + V214, + V215, + V216, + V217, + V218, + V219, + V220, + V221, + V222, + V223, + V224, + V225, + V226, + V227, + V228, + V229, + V230, + V231, + V232, + V233, + V234, + V235, + V236, + V237, + V238, + V239, + V240, + V241, + V242, + V243, + V244, + V245, + V246, + V247, + V248, + V249, + V250, + V251, + V252, + V253, + V254, + V255, + Max, +}; +static constexpr size_t NumVectorRegs = static_cast(VectorReg::Max); + +template +concept RegT = std::is_same_v || std::is_same_v; + +template +[[nodiscard]] constexpr Reg operator+(Reg reg, int num) { + const int result{static_cast(reg) + num}; + if (result >= static_cast(Reg::Max)) { + throw LogicError("Overflow on register arithmetic"); + } + if (result < 0) { + throw LogicError("Underflow on register arithmetic"); + } + return static_cast(result); +} + +template +[[nodiscard]] constexpr Reg operator-(Reg reg, int num) { + return reg + (-num); +} + +template +constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +template +constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + +template +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast(reg); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(Shader::IR::ScalarReg reg, format_context& ctx) const { + return fmt::format_to(ctx.out(), "SGPR{}", static_cast(reg)); + } +}; +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(Shader::IR::VectorReg reg, format_context& ctx) const { + return fmt::format_to(ctx.out(), "VGPR{}", static_cast(reg)); + } +}; diff --git a/src/shader_recompiler/ir/type.cpp b/src/shader_recompiler/ir/type.cpp new file mode 100644 index 00000000..9d303b4d --- /dev/null +++ b/src/shader_recompiler/ir/type.cpp @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "shader_recompiler/ir/type.h" + +namespace Shader::IR { + +std::string NameOf(Type type) { + static constexpr std::array names{ + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", + "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", + "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", + }; + const size_t bits{static_cast(type)}; + if (bits == 0) { + return "Void"; + } + std::string result; + for (size_t i = 0; i < names.size(); i++) { + if ((bits & (size_t{1} << i)) != 0) { + if (!result.empty()) { + result += '|'; + } + result += names[i]; + } + } + return result; +} + +bool AreTypesCompatible(Type lhs, Type rhs) noexcept { + return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h new file mode 100644 index 00000000..d7f47e1d --- /dev/null +++ b/src/shader_recompiler/ir/type.h @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/enum.h" + +namespace Shader::IR { + +enum class Type { + Void = 0, + Opaque = 1 << 0, + ScalarReg = 1 << 1, + VectorReg = 1 << 2, + Attribute = 1 << 3, + SystemValue = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, +}; +DECLARE_ENUM_FLAG_OPERATORS(Type) + +[[nodiscard]] std::string NameOf(Type type); + +[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(Shader::IR::Type type, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(type)); + } +}; diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp new file mode 100644 index 00000000..a455f8b1 --- /dev/null +++ b/src/shader_recompiler/ir/value.cpp @@ -0,0 +1,93 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} + +Value::Value(IR::ScalarReg reg) noexcept : type{Type::ScalarReg}, sreg{reg} {} + +Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {} + +Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} + +Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} + +Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} + +Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} + +Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} + +Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} + +Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} + +Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} + +IR::Type Value::Type() const noexcept { + if (IsPhi()) { + // The type of a phi node is stored in its flags + return inst->Flags(); + } + if (IsIdentity()) { + return inst->Arg(0).Type(); + } + if (type == Type::Opaque) { + return inst->Type(); + } + return type; +} + +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::ScalarReg: + return sreg == other.sreg; + case Type::VectorReg: + return vreg == other.vreg; + case Type::Attribute: + return attribute == other.attribute; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + case Type::F16: + return imm_u16 == other.imm_u16; + case Type::U32: + case Type::F32: + return imm_u32 == other.imm_u32; + case Type::U64: + case Type::F64: + return imm_u64 == other.imm_u64; + case Type::U32x2: + case Type::U32x3: + case Type::U32x4: + case Type::F16x2: + case Type::F16x3: + case Type::F16x4: + case Type::F32x2: + case Type::F32x3: + case Type::F32x4: + case Type::F64x2: + case Type::F64x3: + case Type::F64x4: + break; + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h new file mode 100644 index 00000000..8c97f495 --- /dev/null +++ b/src/shader_recompiler/ir/value.h @@ -0,0 +1,353 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/type.h" + +namespace Shader::IR { + +class Block; +class Inst; + +struct AssociatedInsts; + +class Value { +public: + Value() noexcept = default; + explicit Value(IR::Inst* value) noexcept; + explicit Value(IR::ScalarReg reg) noexcept; + explicit Value(IR::VectorReg reg) noexcept; + explicit Value(IR::Attribute value) noexcept; + explicit Value(bool value) noexcept; + explicit Value(u8 value) noexcept; + explicit Value(u16 value) noexcept; + explicit Value(u32 value) noexcept; + explicit Value(f32 value) noexcept; + explicit Value(u64 value) noexcept; + explicit Value(f64 value) noexcept; + + [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsPhi() const noexcept; + [[nodiscard]] bool IsEmpty() const noexcept; + [[nodiscard]] bool IsImmediate() const noexcept; + [[nodiscard]] IR::Type Type() const noexcept; + + [[nodiscard]] IR::Inst* Inst() const; + [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Inst* TryInstRecursive() const; + [[nodiscard]] IR::Value Resolve() const; + [[nodiscard]] IR::ScalarReg ScalarReg() const; + [[nodiscard]] IR::VectorReg VectorReg() const; + [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] bool U1() const; + [[nodiscard]] u8 U8() const; + [[nodiscard]] u16 U16() const; + [[nodiscard]] u32 U32() const; + [[nodiscard]] f32 F32() const; + [[nodiscard]] u64 U64() const; + [[nodiscard]] f64 F64() const; + + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + +private: + IR::Type type{}; + union { + IR::Inst* inst{}; + IR::ScalarReg sreg; + IR::VectorReg vreg; + IR::Attribute attribute; + bool imm_u1; + u8 imm_u8; + u16 imm_u16; + u32 imm_u32; + f32 imm_f32; + u64 imm_u64; + f64 imm_f64; + }; +}; +static_assert(static_cast(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); +static_assert(std::is_trivially_copyable_v); + +template +class TypedValue : public Value { +public: + TypedValue() = default; + + template + requires((other_type & type_) != IR::Type::Void) + explicit(false) TypedValue(const TypedValue& value) : Value(value) {} + + explicit TypedValue(const Value& value) : Value(value) { + if ((value.Type() & type_) == IR::Type::Void) { + throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); + } + } + + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} +}; + +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + explicit Inst(const Inst& base); + ~Inst(); + + Inst& operator=(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { + return op; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const; + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const { + return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == IR::Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + + void ReplaceOpcode(IR::Opcode opcode); + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); + return ret; + } + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); + } + + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = std::bit_cast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return std::bit_cast(definition); + } + +private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + u32 flags{}; + u32 definition{}; + union { + NonTriviallyDummy dummy{}; + boost::container::small_vector, 2> phi_args; + std::array args; + }; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +using U1 = TypedValue; +using U8 = TypedValue; +using U16 = TypedValue; +using U32 = TypedValue; +using U64 = TypedValue; +using F16 = TypedValue; +using F32 = TypedValue; +using F64 = TypedValue; +using U32F32 = TypedValue; +using U32U64 = TypedValue; +using F32F64 = TypedValue; +using F16F32F64 = TypedValue; +using UAny = TypedValue; + +inline bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; +} + +inline bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; +} + +inline bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +inline bool Value::IsImmediate() const noexcept { + IR::Type current_type{type}; + const IR::Inst* current_inst{inst}; + while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { + const Value& arg{current_inst->Arg(0)}; + current_type = arg.type; + current_inst = arg.inst; + } + return current_type != Type::Opaque; +} + +inline IR::Inst* Value::Inst() const { + DEBUG_ASSERT(type == Type::Opaque); + return inst; +} + +inline IR::Inst* Value::InstRecursive() const { + DEBUG_ASSERT(type == Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +inline IR::Inst* Value::TryInstRecursive() const { + if (IsIdentity()) { + return inst->Arg(0).TryInstRecursive(); + } + return type == Type::Opaque ? inst : nullptr; +} + +inline IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + +inline IR::ScalarReg Value::ScalarReg() const { + DEBUG_ASSERT(type == Type::ScalarReg); + return sreg; +} + +inline IR::VectorReg Value::VectorReg() const { + DEBUG_ASSERT(type == Type::VectorReg); + return vreg; +} + +inline IR::Attribute Value::Attribute() const { + DEBUG_ASSERT(type == Type::Attribute); + return attribute; +} + +inline bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } + DEBUG_ASSERT(type == Type::U1); + return imm_u1; +} + +inline u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } + DEBUG_ASSERT(type == Type::U8); + return imm_u8; +} + +inline u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } + DEBUG_ASSERT(type == Type::U16); + return imm_u16; +} + +inline u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } + DEBUG_ASSERT(type == Type::U32); + return imm_u32; +} + +inline f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + DEBUG_ASSERT(type == Type::F32); + return imm_f32; +} + +inline u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } + DEBUG_ASSERT(type == Type::U64); + return imm_u64; +} + +inline f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + DEBUG_ASSERT(type == Type::F64); + return imm_f64; +} + +[[nodiscard]] inline bool IsPhi(const Inst& inst) { + return inst.GetOpcode() == Opcode::Phi; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 00000000..1398898a --- /dev/null +++ b/src/shader_recompiler/object_pool.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include + +namespace Shader { + +template + requires std::is_destructible_v +class ObjectPool { +public: + explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { + node = &chunks.emplace_back(new_chunk_size); + } + + template + requires std::is_constructible_v + [[nodiscard]] T* Create(Args&&... args) { + return std::construct_at(Memory(), std::forward(args)...); + } + + void ReleaseContents() { + if (chunks.empty()) { + return; + } + Chunk& root{chunks.front()}; + if (root.used_objects == root.num_objects) { + // Root chunk has been filled, squash allocations into it + const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; + chunks.clear(); + chunks.emplace_back(total_objects); + } else { + root.Release(); + chunks.resize(1); + } + chunks.shrink_to_fit(); + node = &chunks.front(); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Storage { + Storage() noexcept {} + ~Storage() noexcept {} + + NonTrivialDummy dummy{}; + T object; + }; + + struct Chunk { + explicit Chunk() = default; + explicit Chunk(size_t size) + : num_objects{size}, storage{std::make_unique(size)} {} + + Chunk& operator=(Chunk&& rhs) noexcept { + Release(); + used_objects = std::exchange(rhs.used_objects, 0); + num_objects = std::exchange(rhs.num_objects, 0); + storage = std::move(rhs.storage); + return *this; + } + + Chunk(Chunk&& rhs) noexcept + : used_objects{std::exchange(rhs.used_objects, 0)}, + num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {} + + ~Chunk() { + Release(); + } + + void Release() { + std::destroy_n(storage.get(), used_objects); + used_objects = 0; + } + + size_t used_objects{}; + size_t num_objects{}; + std::unique_ptr storage; + }; + + [[nodiscard]] T* Memory() { + Chunk* const chunk{FreeChunk()}; + return &chunk->storage[chunk->used_objects++].object; + } + + [[nodiscard]] Chunk* FreeChunk() { + if (node->used_objects != node->num_objects) { + return node; + } + node = &chunks.emplace_back(new_chunk_size); + return node; + } + + Chunk* node{}; + std::vector chunks; + size_t new_chunk_size{}; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 00000000..f3c33c81 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Shader { + +struct Profile { + u32 supported_spirv{0x00010000}; + bool unified_descriptor_binding{}; + bool support_descriptor_aliasing{}; + bool support_int8{}; + bool support_int16{}; + bool support_int64{}; + bool support_vertex_instance_id{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; + bool support_fp16_signed_zero_nan_preserve{}; + bool support_fp32_signed_zero_nan_preserve{}; + bool support_fp64_signed_zero_nan_preserve{}; + bool support_explicit_workgroup_layout{}; + bool support_vote{}; + bool support_viewport_mask{}; + bool support_typeless_image_loads{}; + bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; + bool support_native_ndc{}; + bool support_scaled_attributes{}; + bool support_multi_viewport{}; + bool support_geometry_streams{}; + + bool warp_size_potentially_larger_than_guest{}; + + bool lower_left_origin_mode{}; + /// Fragment outputs have to be declared even if they are not written to avoid undefined values. + /// See Ori and the Blind Forest's main menu for reference. + bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; + + /// OpFClamp is broken and OpFMax + OpFMin should be used instead + bool has_broken_spirv_clamp{}; + /// The Position builtin needs to be wrapped in a struct when used as an input + bool has_broken_spirv_position_input{}; + /// Offset image operands with an unsigned type do not work + bool has_broken_unsigned_image_offsets{}; + /// Signed instructions with unsigned data types are misinterpreted + bool has_broken_signed_operations{}; + /// Float controls break when fp16 is enabled + bool has_broken_fp16_float_controls{}; + /// Ignores SPIR-V ordered vs unordered using GLSL semantics + bool ignore_nan_fp_comparisons{}; + + /// Maxwell and earlier nVidia architectures have broken robust support + bool has_broken_robust{}; + + u64 min_ssbo_alignment{}; + + u32 max_user_clip_distances{}; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 00000000..5bc521bd --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/control_flow_graph.h" +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/structured_control_flow.h" +#include "shader_recompiler/ir/passes/passes.h" +#include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + u32 order_index{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } + return blocks; +} + +std::vector TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Stage stage, + std::span token) { + // Ensure first instruction is expected. + constexpr u32 token_mov_vcchi = 0xBEEB03FF; + ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + + Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); + Gcn::GcnDecodeContext decoder; + + // Decode and save instructions + IR::Program program; + program.ins_list.reserve(token.size()); + while (!slice.atEnd()) { + program.ins_list.emplace_back(decoder.decodeInstruction(slice)); + } + + // Create control flow graph + ObjectPool gcn_block_pool{64}; + Gcn::CFG cfg{gcn_block_pool, program.ins_list}; + + // Structurize control flow graph and create program. + program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); + program.stage = stage; + + // Run optimization passes + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); + Shader::Optimization::IdentityRemovalPass(program.blocks); + // Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); + Shader::Optimization::DeadCodeEliminationPass(program.blocks); + + // TODO: Pass profile from vulkan backend + const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); + return code; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 00000000..8cd9c7ea --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/program.h" + +namespace Shader { + +struct BinaryInfo { + u8 signature[7]; + u8 version; + u32 pssl_or_cg : 1; + u32 cached : 1; + u32 type : 4; + u32 source_type : 2; + u32 length : 24; + u8 chunk_usage_base_offset_in_dw; + u8 num_input_usage_slots; + u8 is_srt : 1; + u8 is_srt_used_info_valid : 1; + u8 is_extended_usage_info : 1; + u8 reserved2 : 5; + u8 reserved3; + u64 shader_hash; + u32 crc32; +}; + +[[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Stage stage, + std::span code); + +} // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 00000000..052108b7 --- /dev/null +++ b/src/shader_recompiler/runtime_info.h @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "shader_recompiler/ir/type.h" + +namespace Shader { + +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, + SignedScaled, + UnsignedScaled, + Disabled, +}; + +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + +enum class Stage : u32 { + Vertex, + TessellationControl, + TessellationEval, + Geometry, + Fragment, + Compute, +}; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast(static_cast(Stage::Vertex) + index); +} + +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + Buffer, +}; +constexpr u32 NUM_TEXTURE_TYPES = 7; + +enum class Interpolation { + Smooth, + Flat, + NoPerspective, +}; + +struct ConstantBufferDescriptor { + u32 index; + u32 count; + + auto operator<=>(const ConstantBufferDescriptor&) const = default; +}; + +struct TextureDescriptor { + TextureType type; + bool is_eud; + bool is_depth; + bool is_multisample; + bool is_storage; + u32 count; + u32 eud_offset_dwords; + u32 ud_index_dwords; + + auto operator<=>(const TextureDescriptor&) const = default; +}; +using TextureDescriptors = boost::container::small_vector; + +struct Info { + bool uses_workgroup_id{}; + bool uses_local_invocation_id{}; + bool uses_invocation_id{}; + bool uses_invocation_info{}; + bool uses_sample_id{}; + + std::array interpolation{}; + // VaryingState loads; + // VaryingState stores; + // VaryingState passthrough; + + std::array stores_frag_color{}; + bool stores_sample_mask{}; + bool stores_frag_depth{}; + + bool uses_fp16{}; + bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; + bool uses_int8{}; + bool uses_int16{}; + bool uses_int64{}; + bool uses_image_1d{}; + bool uses_sampled_1d{}; + bool uses_subgroup_vote{}; + bool uses_subgroup_mask{}; + bool uses_derivatives{}; + + IR::Type used_constant_buffer_types{}; + IR::Type used_storage_buffer_types{}; + IR::Type used_indirect_cbuf_types{}; + + // std::array constant_buffer_used_sizes{}; + u32 used_clip_distances{}; + + // boost::container::static_vector + // constant_buffer_descriptors; + // boost::container::static_vector + // storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + // ImageBufferDescriptors image_buffer_descriptors; + // TextureDescriptors texture_descriptors; + // ImageDescriptors image_descriptors; +}; + +} // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5bbe6ee2..89c54831 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -6,6 +6,7 @@ #include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" namespace AmdGpu { @@ -107,7 +108,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - // rasterizer->DrawIndex(); + rasterizer->DrawIndex(); break; } case PM4ItOpcode::DrawIndexAuto: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index b40c9ba5..78526569 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -15,6 +15,10 @@ #include #include +namespace Vulkan { +class Rasterizer; +} + namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) @@ -46,9 +50,10 @@ struct Liverpool { } settings; UserData user_data; - const u8* Address() const { + template + const T* Address() const { const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; - return reinterpret_cast(addr); + return reinterpret_cast(addr); } }; @@ -631,10 +636,15 @@ public: void WaitGpuIdle(); + void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; + } + private: void ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes); void Process(std::stop_token stoken); + Vulkan::Rasterizer* rasterizer; std::jthread process_thread{}; std::queue> gfx_ring{}; std::condition_variable_any cv_submit{}; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp new file mode 100644 index 00000000..8f9a76a2 --- /dev/null +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" + +namespace Vulkan::LiverpoolToVK { + +vk::StencilOp StencilOp(Liverpool::StencilFunc op) { + switch (op) { + case Liverpool::StencilFunc::Keep: + return vk::StencilOp::eKeep; + case Liverpool::StencilFunc::Zero: + return vk::StencilOp::eZero; + case Liverpool::StencilFunc::AddClamp: + return vk::StencilOp::eIncrementAndClamp; + case Liverpool::StencilFunc::SubClamp: + return vk::StencilOp::eDecrementAndClamp; + case Liverpool::StencilFunc::Invert: + return vk::StencilOp::eInvert; + case Liverpool::StencilFunc::AddWrap: + return vk::StencilOp::eIncrementAndWrap; + case Liverpool::StencilFunc::SubWrap: + return vk::StencilOp::eDecrementAndWrap; + default: + UNREACHABLE(); + return vk::StencilOp::eKeep; + } +} + +vk::CompareOp CompareOp(Liverpool::CompareFunc func) { + switch (func) { + case Liverpool::CompareFunc::Always: + return vk::CompareOp::eAlways; + case Liverpool::CompareFunc::Equal: + return vk::CompareOp::eEqual; + case Liverpool::CompareFunc::GreaterEqual: + return vk::CompareOp::eGreaterOrEqual; + case Liverpool::CompareFunc::Greater: + return vk::CompareOp::eGreater; + case Liverpool::CompareFunc::LessEqual: + return vk::CompareOp::eLessOrEqual; + case Liverpool::CompareFunc::Less: + return vk::CompareOp::eLess; + case Liverpool::CompareFunc::NotEqual: + return vk::CompareOp::eNotEqual; + case Liverpool::CompareFunc::Never: + return vk::CompareOp::eNever; + default: + UNREACHABLE(); + return vk::CompareOp::eAlways; + } +} + +vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { + switch (type) { + case Liverpool::PrimitiveType::PointList: + return vk::PrimitiveTopology::ePointList; + case Liverpool::PrimitiveType::LineList: + return vk::PrimitiveTopology::eLineList; + case Liverpool::PrimitiveType::LineStrip: + return vk::PrimitiveTopology::eLineStrip; + case Liverpool::PrimitiveType::TriangleList: + return vk::PrimitiveTopology::eTriangleList; + case Liverpool::PrimitiveType::TriangleFan: + return vk::PrimitiveTopology::eTriangleFan; + case Liverpool::PrimitiveType::TriangleStrip: + return vk::PrimitiveTopology::eTriangleStrip; + case Liverpool::PrimitiveType::AdjLineList: + return vk::PrimitiveTopology::eLineListWithAdjacency; + case Liverpool::PrimitiveType::AdjLineStrip: + return vk::PrimitiveTopology::eLineStripWithAdjacency; + case Liverpool::PrimitiveType::AdjTriangleList: + return vk::PrimitiveTopology::eTriangleListWithAdjacency; + case Liverpool::PrimitiveType::AdjTriangleStrip: + return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + default: + UNREACHABLE(); + return vk::PrimitiveTopology::eTriangleList; + } +} + +vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) { + switch (mode) { + case Liverpool::PolygonMode::Point: + return vk::PolygonMode::ePoint; + case Liverpool::PolygonMode::Line: + return vk::PolygonMode::eLine; + case Liverpool::PolygonMode::Fill: + return vk::PolygonMode::eFill; + default: + UNREACHABLE(); + return vk::PolygonMode::eFill; + } +} + +vk::CullModeFlags CullMode(Liverpool::CullMode mode) { + switch (mode) { + case Liverpool::CullMode::None: + return vk::CullModeFlagBits::eNone; + case Liverpool::CullMode::Front: + return vk::CullModeFlagBits::eFront; + case Liverpool::CullMode::Back: + return vk::CullModeFlagBits::eBack; + case Liverpool::CullMode::FrontAndBack: + return vk::CullModeFlagBits::eFrontAndBack; + default: + UNREACHABLE(); + return vk::CullModeFlagBits::eNone; + } +} + +} // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h new file mode 100644 index 00000000..97994bf8 --- /dev/null +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan::LiverpoolToVK { + +using Liverpool = AmdGpu::Liverpool; + +vk::StencilOp StencilOp(Liverpool::StencilFunc op); + +vk::CompareOp CompareOp(Liverpool::CompareFunc func); + +vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type); + +vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); + +vk::CullModeFlags CullMode(Liverpool::CullMode mode); + +} // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d68eaa74..e952263e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -7,6 +7,7 @@ #include "core/libraries/system/systemservice.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include @@ -60,9 +61,10 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for }; } -RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_) +RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool) : window{window_}, instance{window, Config::getGpuId()}, scheduler{instance}, swapchain{instance, window}, texture_cache{instance, scheduler} { + rasterizer = std::make_unique(instance, scheduler, texture_cache, liverpool); const u32 num_images = swapchain.GetImageCount(); const vk::Device device = instance.GetDevice(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 3c7d0d6b..a3627584 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -13,6 +13,10 @@ namespace Frontend { class WindowSDL; } +namespace AmdGpu { +struct Liverpool; +} + namespace Vulkan { struct Frame { @@ -26,9 +30,11 @@ struct Frame { vk::CommandBuffer cmdbuf; }; +class Rasterizer; + class RendererVulkan { public: - explicit RendererVulkan(Frontend::WindowSDL& window); + explicit RendererVulkan(Frontend::WindowSDL& window, AmdGpu::Liverpool* liverpool); ~RendererVulkan(); Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, @@ -47,6 +53,7 @@ private: Instance instance; Scheduler scheduler; Swapchain swapchain; + std::unique_ptr rasterizer; VideoCore::TextureCache texture_cache; vk::UniqueCommandPool command_pool; std::vector present_frames; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 00000000..6cbd26b9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, + vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, + std::array modules) + : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { + const vk::Device device = instance.GetDevice(); + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 0U, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0U, + .pVertexAttributeDescriptions = nullptr, + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = LiverpoolToVK::PrimitiveType(key.prim_type), + .primitiveRestartEnable = false, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), + .cullMode = LiverpoolToVK::CullMode(key.cull_mode), + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .logicOp = vk::LogicOp::eCopy, + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1}, + }; + + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + boost::container::static_vector dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = key.depth.depth_enable, + .depthWriteEnable = key.depth.depth_write_enable, + .depthCompareOp = LiverpoolToVK::CompareOp(key.depth.depth_func), + .depthBoundsTestEnable = key.depth.depth_bounds_enable, + .stencilTestEnable = key.depth.stencil_enable, + .front{ + .failOp = LiverpoolToVK::StencilOp(key.stencil.stencil_fail_front), + .passOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zpass_front), + .depthFailOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zfail_front), + .compareOp = LiverpoolToVK::CompareOp(key.depth.stencil_ref_func), + .compareMask = key.stencil_ref_front.stencil_mask, + .writeMask = key.stencil_ref_front.stencil_write_mask, + .reference = key.stencil_ref_front.stencil_test_val, + }, + .back{ + .failOp = LiverpoolToVK::StencilOp(key.stencil.stencil_fail_back), + .passOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zpass_back), + .depthFailOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zfail_back), + .compareOp = LiverpoolToVK::CompareOp(key.depth.stencil_bf_func), + .compareMask = key.stencil_ref_back.stencil_mask, + .writeMask = key.stencil_ref_back.stencil_write_mask, + .reference = key.stencil_ref_back.stencil_test_val, + }, + }; + + u32 shader_count = 2; + std::array shader_stages; + shader_stages[0] = vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = modules[0], + .pName = "main", + }; + shader_stages[1] = vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = modules[4], + .pName = "main", + }; + + const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; + const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { + .colorAttachmentCount = 1, + .pColorAttachmentFormats = &color_format, + .depthAttachmentFormat = vk::Format::eUndefined, + .stencilAttachmentFormat = vk::Format::eUndefined, + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .pNext = &pipeline_rendering_ci, + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = pipeline_layout, + }; + + auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); + if (result.result == vk::Result::eSuccess) { + pipeline = std::move(result.value); + } else { + UNREACHABLE_MSG("Graphics pipeline creation failed!"); + } +} + +GraphicsPipeline::~GraphicsPipeline() = default; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 00000000..d8b7887b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/types.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +static constexpr u32 MaxShaderStages = 5; + +class Instance; + +using Liverpool = AmdGpu::Liverpool; + +struct PipelineKey { + Liverpool::DepthControl depth; + Liverpool::StencilControl stencil; + Liverpool::StencilRefMask stencil_ref_front; + Liverpool::StencilRefMask stencil_ref_back; + Liverpool::PrimitiveType prim_type; + Liverpool::PolygonMode polygon_mode; + Liverpool::CullMode cull_mode; +}; +static_assert(std::has_unique_object_representations_v); + +class GraphicsPipeline { +public: + explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, + vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, + std::array modules); + ~GraphicsPipeline(); + + [[nodiscard]] vk::Pipeline Handle() const noexcept { + return *pipeline; + } + +private: + const Instance& instance; + vk::UniquePipeline pipeline; + vk::PipelineLayout pipeline_layout; + vk::PipelineCache pipeline_cache; + PipelineKey key; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 365fcb5d..0cde3e6e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -196,9 +196,15 @@ bool Instance::CreateDevice() { .shaderClipDistance = features.shaderClipDistance, }, }, + vk::PhysicalDeviceVulkan11Features{ + .shaderDrawParameters = true, + }, vk::PhysicalDeviceVulkan12Features{ .timelineSemaphore = true, }, + vk::PhysicalDeviceVulkan13Features{ + .dynamicRendering = true, + }, vk::PhysicalDeviceCustomBorderColorFeaturesEXT{ .customBorderColors = true, .customBorderColorWithoutFormat = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 00000000..28fb51d0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/scope_exit.h" +#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, + AmdGpu::Liverpool* liverpool_) + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096}, + block_pool{512} { + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 0U, + .pSetLayouts = nullptr, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); + pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); +} + +void PipelineCache::BindPipeline() { + SCOPE_EXIT { + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + }; + + if (pipeline) { + return; + } + + const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { + const u32* token = pgm.Address(); + + // Retrieve shader header. + Shader::BinaryInfo bininfo; + std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); + + // Lookup if the shader already exists. + const auto it = module_map.find(bininfo.shader_hash); + if (it != module_map.end()) { + return *it->second; + } + + // Compile and cache shader. + const auto data = std::span{token, bininfo.length / sizeof(u32)}; + const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data); + return CompileSPV(program, instance.GetDevice()); + }; + + // Retrieve shader stage modules. + // TODO: Only do this when program address is changed. + stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex); + stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment); + + // Bind pipeline. + // TODO: Read entire key based on reg state. + graphics_key.prim_type = liverpool->regs.primitive_type; + graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode(); + pipeline = std::make_unique(instance, graphics_key, *pipeline_cache, + *pipeline_layout, stages); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 00000000..7634f9cb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" + +namespace Vulkan { + +class Instance; +class Scheduler; + +class PipelineCache { + static constexpr size_t MaxShaderStages = 5; + +public: + explicit PipelineCache(const Instance& instance, Scheduler& scheduler, + AmdGpu::Liverpool* liverpool); + ~PipelineCache() = default; + + void BindPipeline(); + +private: + const Instance& instance; + Scheduler& scheduler; + AmdGpu::Liverpool* liverpool; + vk::UniquePipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + tsl::robin_map module_map; + std::array stages{}; + std::unique_ptr pipeline; + PipelineKey graphics_key{}; + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 00000000..f9c8a9a8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/image_view.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace Vulkan { + +static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eTransferDst; + +Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, + VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) + : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, + liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { + liverpool->BindRasterizer(this); +} + +Rasterizer::~Rasterizer() = default; + +void Rasterizer::DrawIndex() { + const auto cmdbuf = scheduler.CommandBuffer(); + auto& regs = liverpool->regs; + + static bool first_time = true; + if (first_time) { + first_time = false; + return; + } + + UpdateDynamicState(); + + pipeline_cache.BindPipeline(); + + const u32 pitch = regs.color_buffers[0].Pitch(); + const u32 height = regs.color_buffers[0].Height(); + const u32 tile_max = regs.color_buffers[0].slice.tile_max; + auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch); + + const vk::RenderingAttachmentInfo color_info = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eGeneral, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + }; + + // TODO: Don't restart renderpass every draw + const vk::RenderingInfo rendering_info = { + .renderArea = {.offset = {0, 0}, .extent = {1920, 1080}}, + .layerCount = 1, + .colorAttachmentCount = 1, + .pColorAttachments = &color_info, + }; + + cmdbuf.beginRendering(rendering_info); + cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); + cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.endRendering(); +} + +void Rasterizer::UpdateDynamicState() { + UpdateViewportScissorState(); +} + +void Rasterizer::UpdateViewportScissorState() { + auto& regs = liverpool->regs; + + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::Viewport viewport{ + .x = regs.viewports[0].xoffset - regs.viewports[0].xscale, + .y = regs.viewports[0].yoffset - regs.viewports[0].yscale, + .width = regs.viewports[0].xscale * 2.0f, + .height = regs.viewports[0].yscale * 2.0f, + .minDepth = regs.viewports[0].zoffset - regs.viewports[0].zscale, + .maxDepth = regs.viewports[0].zscale + regs.viewports[0].zoffset, + }; + const vk::Rect2D scissor{ + .offset = {regs.screen_scissor.top_left_x, regs.screen_scissor.top_left_y}, + .extent = {regs.screen_scissor.GetWidth(), regs.screen_scissor.GetHeight()}, + }; + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); +} + +void Rasterizer::UpdateDepthStencilState() { + auto& depth = liverpool->regs.depth_control; + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 00000000..ba3c2d3a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,51 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace AmdGpu { +struct Liverpool; +} + +namespace VideoCore { +class TextureCache; +} + +namespace Vulkan { + +class Scheduler; +class GraphicsPipeline; + +class Rasterizer { +public: + explicit Rasterizer(const Instance& instance, Scheduler& scheduler, + VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); + ~Rasterizer(); + + /// Performs a draw call with an index buffer. + void DrawIndex(); + + /// Updates graphics state that is not part of the bound pipeline. + void UpdateDynamicState(); + +private: + /// Updates viewport and scissor from liverpool registers. + void UpdateViewportScissorState(); + + /// Updates depth and stencil pipeline state from liverpool registers. + void UpdateDepthStencilState(); + +private: + const Instance& instance; + Scheduler& scheduler; + VideoCore::TextureCache& texture_cache; + AmdGpu::Liverpool* liverpool; + PipelineCache pipeline_cache; + StreamBuffer vertex_index_buffer; +}; + +} // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index ea0336cc..b78d2563 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -38,7 +38,7 @@ using Libraries::VideoOut::TilingMode; if (false /*&& IsDepthStencilFormat(format)*/) { usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { - // usage |= vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eStorage; + usage |= vk::ImageUsageFlagBits::eColorAttachment; } return usage; } diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 32c9df6e..c1bddec7 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -7,6 +7,7 @@ #include "common/types.h" #include "core/libraries/videoout/buffer.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" namespace Vulkan { @@ -41,23 +42,6 @@ struct ImageInfo { u32 guest_size_bytes = 0; }; -struct Handle { - VmaAllocation allocation; - VkImage image; - - Handle() = default; - - Handle(Handle&& other) - : image{std::exchange(other.image, VK_NULL_HANDLE)}, - allocation{std::exchange(other.allocation, VK_NULL_HANDLE)} {} - - Handle& operator=(Handle&& other) { - image = std::exchange(other.image, VK_NULL_HANDLE); - allocation = std::exchange(other.allocation, VK_NULL_HANDLE); - return *this; - } -}; - struct UniqueImage { explicit UniqueImage(vk::Device device, VmaAllocator allocator); ~UniqueImage(); @@ -100,6 +84,14 @@ struct Image { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + ImageViewId FindView(const ImageViewInfo& info) const { + const auto it = std::ranges::find(image_view_infos, info); + if (it == image_view_infos.end()) { + return {}; + } + return image_view_ids[std::distance(it, image_view_infos.begin())]; + } + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask); const Vulkan::Instance* instance; @@ -110,6 +102,8 @@ struct Image { ImageFlagBits flags = ImageFlagBits::CpuModified; VAddr cpu_addr = 0; VAddr cpu_addr_end = 0; + std::vector image_view_infos; + std::vector image_view_ids; // Resource state tracking vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 6b3c7f5a..d84a963e 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -13,20 +13,9 @@ class Scheduler; namespace VideoCore { -enum class ImageViewType : u32 { - e1D, - e2D, - Cube, - e3D, - e1DArray, - e2DArray, - CubeArray, - Buffer, -}; - struct ImageViewInfo { - vk::ImageViewType type{}; - vk::Format format{}; + vk::ImageViewType type = vk::ImageViewType::e2D; + vk::Format format = vk::Format::eR8G8B8A8Unorm; SubresourceRange range; vk::ComponentMapping mapping{}; diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 6b2e7553..7f232920 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -31,10 +31,6 @@ class SlotVector { constexpr static std::size_t InitialCapacity = 1024; public: - SlotVector() { - Reserve(InitialCapacity); - } - ~SlotVector() noexcept { std::size_t index = 0; for (u64 bits : stored_bitset) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 71abd9c2..15679ba9 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -83,6 +83,15 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ASSERT_MSG(veh_handle, "Failed to register an exception handler"); #endif g_texture_cache = this; + + ImageInfo info; + info.pixel_format = vk::Format::eR8G8B8A8Unorm; + info.type = vk::ImageType::e2D; + const ImageId null_id = slot_images.insert(instance, scheduler, info, 0); + ASSERT(null_id.index == 0); + + ImageViewInfo view_info; + void(slot_image_views.insert(instance, scheduler, view_info, slot_images[null_id].image)); } TextureCache::~TextureCache() { @@ -128,6 +137,29 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } +ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { + boost::container::small_vector image_ids; + ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { + if (image.cpu_addr == cpu_address) { + image_ids.push_back(image_id); + } + }); + + ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); + auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; + + ImageViewInfo info; + info.format = vk::Format::eB8G8R8A8Srgb; + if (const ImageViewId view_id = image->FindView(info); view_id) { + return slot_image_views[view_id]; + } + + const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); + image->image_view_infos.emplace_back(info); + image->image_view_ids.emplace_back(view_id); + return slot_image_views[view_id]; +} + void TextureCache::RefreshImage(Image& image) { // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ac4df300..a11201c4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/image.h" +#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/slot_vector.h" namespace Core::Libraries::VideoOut { @@ -35,6 +36,9 @@ public: /// Retrieves the image handle of the image with the provided attributes and address. Image& FindImage(const ImageInfo& info, VAddr cpu_address); + /// Retrieves the render target with specified properties + ImageView& RenderTarget(VAddr cpu_address, u32 pitch); + /// Reuploads image contents. void RefreshImage(Image& image); @@ -116,6 +120,7 @@ private: Vulkan::Scheduler& scheduler; Vulkan::StreamBuffer staging; SlotVector slot_images; + SlotVector slot_image_views; tsl::robin_pg_map> page_table; boost::icl::interval_map cached_pages; #ifdef _WIN64 diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 70aaddc5..4ba856c0 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -47,16 +47,22 @@ struct SubresourceLayers { struct SubresourceBase { s32 level = 0; s32 layer = 0; + + auto operator<=>(const SubresourceBase&) const = default; }; struct SubresourceExtent { s32 levels = 1; s32 layers = 1; + + auto operator<=>(const SubresourceExtent&) const = default; }; struct SubresourceRange { SubresourceBase base; SubresourceExtent extent; + + auto operator<=>(const SubresourceRange&) const = default; }; struct ImageCopy { diff --git a/src/vulkan_util.cpp b/src/vulkan_util.cpp deleted file mode 100644 index bc4bb734..00000000 --- a/src/vulkan_util.cpp +++ /dev/null @@ -1,626 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include -#include -#include -#include "common/assert.h" -#include "common/debug.h" -#include "common/logging/log.h" -#include "common/singleton.h" -#include "vulkan_util.h" - -#include - -void Graphics::Vulkan::vulkanCreate(Emu::WindowCtx* ctx) { - /*Emu::VulkanExt ext; - vulkanGetInstanceExtensions(&ext); - - VkApplicationInfo app_info{}; - app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - app_info.pNext = nullptr; - app_info.pApplicationName = "shadps4"; - app_info.applicationVersion = 1; - app_info.pEngineName = "shadps4"; - app_info.engineVersion = 1; - app_info.apiVersion = VK_API_VERSION_1_2; - - VkInstanceCreateInfo inst_info{}; - inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - inst_info.pNext = nullptr; - inst_info.flags = 0; - inst_info.pApplicationInfo = &app_info; - inst_info.enabledExtensionCount = ext.required_extensions_count; - inst_info.ppEnabledExtensionNames = ext.required_extensions; - inst_info.enabledLayerCount = 0; - inst_info.ppEnabledLayerNames = nullptr; - - const VkResult result = vkCreateInstance(&inst_info, nullptr, &ctx->m_graphic_ctx.m_instance); - ASSERT_MSG(result == VK_SUCCESS, "Can't create an vulkan instance"); - - if (SDL_Vulkan_CreateSurface(ctx->m_window, ctx->m_graphic_ctx.m_instance, NULL, - &ctx->m_surface) == SDL_FALSE) { - UNREACHABLE_MSG("Can't create an vulkan surface"); - } - - // TODO i am not sure if it's that it is neccesary or if it needs more - std::vector device_extensions = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, - VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, "VK_KHR_maintenance1"}; - - Emu::VulkanQueues queues; - - vulkanFindCompatiblePhysicalDevice(ctx->m_graphic_ctx.m_instance, ctx->m_surface, - device_extensions, &ctx->m_surface_capabilities, - &ctx->m_graphic_ctx.m_physical_device, &queues); - - ASSERT_MSG(ctx->m_graphic_ctx.m_physical_device, "Can't find compatible vulkan device"); - - VkPhysicalDeviceProperties device_properties{}; - vkGetPhysicalDeviceProperties(ctx->m_graphic_ctx.m_physical_device, &device_properties); - - LOG_INFO(Render_Vulkan, "GFX device to be used : {}", device_properties.deviceName); - - ctx->m_graphic_ctx.m_device = vulkanCreateDevice( - ctx->m_graphic_ctx.m_physical_device, ctx->m_surface, &ext, queues, device_extensions); - ASSERT_MSG(ctx->m_graphic_ctx.m_device, "Can't create vulkan device"); - - vulkanCreateQueues(&ctx->m_graphic_ctx, queues); - ctx->swapchain = vulkanCreateSwapchain(&ctx->m_graphic_ctx, 2);*/ -} - -Emu::VulkanSwapchain Graphics::Vulkan::vulkanCreateSwapchain(HLE::Libs::Graphics::GraphicCtx* ctx, - u32 image_count) { - return {}; - /*auto window_ctx = Common::Singleton::Instance(); - const auto& capabilities = window_ctx->m_surface_capabilities.capabilities; - Emu::VulkanSwapchain s{}; - - VkExtent2D extent{}; - extent.width = std::clamp(ctx->screen_width, capabilities.minImageExtent.width, - capabilities.maxImageExtent.width); - extent.height = std::clamp(ctx->screen_height, capabilities.minImageExtent.height, - capabilities.maxImageExtent.height); - image_count = std::clamp(image_count, capabilities.minImageCount, capabilities.maxImageCount); - - VkSwapchainCreateInfoKHR create_info{}; - create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.surface = window_ctx->m_surface; - create_info.minImageCount = image_count; - - if (window_ctx->m_surface_capabilities.is_format_unorm_bgra32) { - create_info.imageFormat = VK_FORMAT_B8G8R8A8_UNORM; - create_info.imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - } else if (window_ctx->m_surface_capabilities.is_format_srgb_bgra32) { - create_info.imageFormat = VK_FORMAT_B8G8R8A8_SRGB; - create_info.imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - } else { - create_info.imageFormat = window_ctx->m_surface_capabilities.formats.at(0).format; - create_info.imageColorSpace = window_ctx->m_surface_capabilities.formats.at(0).colorSpace; - } - - create_info.imageExtent = extent; - create_info.imageArrayLayers = 1; - create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - create_info.queueFamilyIndexCount = 0; - create_info.pQueueFamilyIndices = nullptr; - create_info.preTransform = window_ctx->m_surface_capabilities.capabilities.currentTransform; - create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - create_info.presentMode = VK_PRESENT_MODE_FIFO_KHR; - create_info.clipped = VK_TRUE; - create_info.oldSwapchain = nullptr; - - s.swapchain_format = create_info.imageFormat; - s.swapchain_extent = extent; - - vkCreateSwapchainKHR(ctx->m_device, &create_info, nullptr, &s.swapchain); - - vkGetSwapchainImagesKHR(ctx->m_device, s.swapchain, &s.swapchain_images_count, nullptr); - - s.swapchain_images.resize(s.swapchain_images_count); - vkGetSwapchainImagesKHR(ctx->m_device, s.swapchain, &s.swapchain_images_count, - s.swapchain_images.data()); - - s.swapchain_image_views.resize(s.swapchain_images_count); - for (uint32_t i = 0; i < s.swapchain_images_count; i++) { - VkImageViewCreateInfo create_info{}; - create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.image = s.swapchain_images[i]; - create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - create_info.format = s.swapchain_format; - create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - create_info.subresourceRange.baseArrayLayer = 0; - create_info.subresourceRange.baseMipLevel = 0; - create_info.subresourceRange.layerCount = 1; - create_info.subresourceRange.levelCount = 1; - - vkCreateImageView(ctx->m_device, &create_info, nullptr, &s.swapchain_image_views[i]); - } - - ASSERT_MSG(s.swapchain, "Could not create swapchain"); - s.current_index = static_cast(-1); - - VkSemaphoreCreateInfo present_complete_info{}; - present_complete_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - present_complete_info.pNext = nullptr; - present_complete_info.flags = 0; - - auto result = vkCreateSemaphore(ctx->m_device, &present_complete_info, nullptr, - &s.present_complete_semaphore); - - VkFenceCreateInfo fence_info{}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.pNext = nullptr; - fence_info.flags = 0; - - result = vkCreateFence(ctx->m_device, &fence_info, nullptr, &s.present_complete_fence); - ASSERT_MSG(result == VK_SUCCESS, "Can't create vulkan fence"); - - return s;*/ -} - -void Graphics::Vulkan::vulkanCreateQueues(HLE::Libs::Graphics::GraphicCtx* ctx, - const Emu::VulkanQueues& queues) { - auto get_queue = [ctx](int id, const Emu::VulkanQueueInfo& info, bool with_mutex = false) { - ctx->queues[id].family = info.family; - ctx->queues[id].index = info.index; - vkGetDeviceQueue(ctx->m_device, ctx->queues[id].family, ctx->queues[id].index, - &ctx->queues[id].vk_queue); - if (with_mutex) { - ctx->queues[id].mutex = std::make_unique(); - } - }; - - get_queue(VULKAN_QUEUE_GFX, queues.graphics.at(0)); - get_queue(VULKAN_QUEUE_UTIL, queues.transfer.at(0)); - get_queue(VULKAN_QUEUE_PRESENT, queues.present.at(0)); - - for (int id = 0; id < VULKAN_QUEUE_COMPUTE_NUM; id++) { - bool with_mutex = (VULKAN_QUEUE_COMPUTE_NUM == queues.compute.size()); - get_queue(id, queues.compute.at(id % queues.compute.size()), with_mutex); - } -} - -VkDevice Graphics::Vulkan::vulkanCreateDevice(VkPhysicalDevice physical_device, - VkSurfaceKHR surface, const Emu::VulkanExt* r, - const Emu::VulkanQueues& queues, - const std::vector& device_extensions) { - /*std::vector queue_create_info(queues.family_count); - std::vector> queue_priority(queues.family_count); - uint32_t queue_create_info_num = 0; - - for (uint32_t i = 0; i < queues.family_count; i++) { - if (queues.family_used[i] != 0) { - for (uint32_t pi = 0; pi < queues.family_used[i]; pi++) { - queue_priority[queue_create_info_num].push_back(1.0f); - } - - queue_create_info[queue_create_info_num].sType = - VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queue_create_info[queue_create_info_num].pNext = nullptr; - queue_create_info[queue_create_info_num].flags = 0; - queue_create_info[queue_create_info_num].queueFamilyIndex = i; - queue_create_info[queue_create_info_num].queueCount = queues.family_used[i]; - queue_create_info[queue_create_info_num].pQueuePriorities = - queue_priority[queue_create_info_num].data(); - - queue_create_info_num++; - } - } - - VkPhysicalDeviceFeatures device_features{}; - // TODO add neccesary device features - - VkDeviceCreateInfo create_info{}; - create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.pQueueCreateInfos = queue_create_info.data(); - create_info.queueCreateInfoCount = queue_create_info_num; - create_info.enabledLayerCount = 0; - create_info.ppEnabledLayerNames = nullptr; - create_info.enabledExtensionCount = device_extensions.size(); - create_info.ppEnabledExtensionNames = device_extensions.data(); - create_info.pEnabledFeatures = &device_features; - - VkDevice device = nullptr; - - vkCreateDevice(physical_device, &create_info, nullptr, &device); - - return device;*/ -} -void Graphics::Vulkan::vulkanGetInstanceExtensions(Emu::VulkanExt* ext) { - /*u32 required_extensions_count = 0; - u32 available_extensions_count = 0; - u32 available_layers_count = 0; - ext->required_extensions = SDL_Vulkan_GetInstanceExtensions(&required_extensions_count); - ext->required_extensions_count = required_extensions_count; - - vkEnumerateInstanceExtensionProperties(nullptr, &available_extensions_count, nullptr); - - ext->available_extensions = std::vector(available_extensions_count); - - vkEnumerateInstanceExtensionProperties(nullptr, &available_extensions_count, - ext->available_extensions.data()); - - vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr); - ext->available_layers = std::vector(available_layers_count); - vkEnumerateInstanceLayerProperties(&available_layers_count, ext->available_layers.data()); - - // for (const char* ext : ext->required_extensions) { - // LOG_INFO(Render_Vulkan, "Vulkan required extension = {}", ext); - //} - - for (const auto& ext : ext->available_extensions) { - LOG_INFO(Render_Vulkan, "Vulkan available extension: {}, version = {}", ext.extensionName, - ext.specVersion); - } - - for (const auto& l : ext->available_layers) { - LOG_INFO(Render_Vulkan, - "Vulkan available layer: {}, specVersion = {}, implVersion = {}, {}", l.layerName, - l.specVersion, l.implementationVersion, l.description); - }*/ -} - -void Graphics::Vulkan::vulkanFindCompatiblePhysicalDevice( - VkInstance instance, VkSurfaceKHR surface, const std::vector& device_extensions, - Emu::VulkanSurfaceCapabilities* out_capabilities, VkPhysicalDevice* out_device, - Emu::VulkanQueues* out_queues) { - /*u32 count_devices = 0; - vkEnumeratePhysicalDevices(instance, &count_devices, nullptr); - - std::vector devices(count_devices); - vkEnumeratePhysicalDevices(instance, &count_devices, devices.data()); - - VkPhysicalDevice found_best_device = nullptr; - Emu::VulkanQueues found_best_queues; - - for (const auto& device : devices) { - VkPhysicalDeviceProperties device_properties{}; - VkPhysicalDeviceFeatures2 device_features2{}; - - vkGetPhysicalDeviceProperties(device, &device_properties); - vkGetPhysicalDeviceFeatures2(device, &device_features2); - if (device_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) { - continue; // we don't want integrated gpu for now .Later we will check the requirements - // and see what we can support (TODO fix me) - } - LOG_INFO(Render_Vulkan, "Vulkan device: {}", device_properties.deviceName); - - auto qs = vulkanFindQueues(device, surface); - - vulkanGetSurfaceCapabilities(device, surface, out_capabilities); - - found_best_device = device; - found_best_queues = qs; - } - *out_device = found_best_device; - *out_queues = found_best_queues;*/ -} - -Emu::VulkanQueues Graphics::Vulkan::vulkanFindQueues(VkPhysicalDevice device, - VkSurfaceKHR surface) { - Emu::VulkanQueues qs; - - /*u32 queue_family_count = 0; - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, nullptr); - std::vector queue_families(queue_family_count); - vkGetPhysicalDeviceQueueFamilyProperties(device, &queue_family_count, queue_families.data()); - - qs.family_count = queue_family_count; - - u32 family = 0; - for (auto& f : queue_families) { - VkBool32 presentation_supported = VK_FALSE; - vkGetPhysicalDeviceSurfaceSupportKHR(device, family, surface, &presentation_supported); - - LOG_INFO(Render_Vulkan, "queue family: {}, count = {}, present = {}", - string_VkQueueFlags(f.queueFlags).c_str(), f.queueCount, - (presentation_supported == VK_TRUE ? "true" : "false")); - for (uint32_t i = 0; i < f.queueCount; i++) { - Emu::VulkanQueueInfo info; - info.family = family; - info.index = i; - info.is_graphics = (f.queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0; - info.is_compute = (f.queueFlags & VK_QUEUE_COMPUTE_BIT) != 0; - info.is_transfer = (f.queueFlags & VK_QUEUE_TRANSFER_BIT) != 0; - info.is_present = (presentation_supported == VK_TRUE); - - qs.available.push_back(info); - } - - qs.family_used.push_back(0); - - family++; - } - u32 index = 0; - for (u32 i = 0; i < VULKAN_QUEUE_GRAPHICS_NUM; i++) { - for (const auto& idx : qs.available) { - if (idx.is_graphics) { - qs.family_used[qs.available.at(index).family]++; - qs.graphics.push_back(qs.available.at(index)); - qs.available.erase(qs.available.begin() + index); - break; - } - index++; - } - } - index = 0; - for (u32 i = 0; i < VULKAN_QUEUE_COMPUTE_NUM; i++) { - for (const auto& idx : qs.available) { - if (idx.is_graphics) { - qs.family_used[qs.available.at(index).family]++; - qs.compute.push_back(qs.available.at(index)); - qs.available.erase(qs.available.begin() + index); - break; - } - index++; - } - } - index = 0; - for (uint32_t i = 0; i < VULKAN_QUEUE_TRANSFER_NUM; i++) { - for (const auto& idx : qs.available) { - if (idx.is_graphics) { - qs.family_used[qs.available.at(index).family]++; - qs.transfer.push_back(qs.available.at(index)); - qs.available.erase(qs.available.begin() + index); - break; - } - index++; - } - } - index = 0; - for (uint32_t i = 0; i < VULKAN_QUEUE_PRESENT_NUM; i++) { - for (const auto& idx : qs.available) { - if (idx.is_graphics) { - qs.family_used[qs.available.at(index).family]++; - qs.present.push_back(qs.available.at(index)); - qs.available.erase(qs.available.begin() + index); - break; - } - index++; - } - }*/ - return qs; -} - -void Graphics::Vulkan::vulkanGetSurfaceCapabilities(VkPhysicalDevice physical_device, - VkSurfaceKHR surface, - Emu::VulkanSurfaceCapabilities* surfaceCap) { - /*vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, - &surfaceCap->capabilities); - - uint32_t formats_count = 0; - vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &formats_count, nullptr); - - surfaceCap->formats = std::vector(formats_count); - vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &formats_count, - surfaceCap->formats.data()); - - uint32_t present_modes_count = 0; - vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &present_modes_count, - nullptr); - - surfaceCap->present_modes = std::vector(present_modes_count); - vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &present_modes_count, - surfaceCap->present_modes.data()); - - for (const auto& f : surfaceCap->formats) { - if (f.format == VK_FORMAT_B8G8R8A8_SRGB && - f.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { - surfaceCap->is_format_srgb_bgra32 = true; - break; - } - if (f.format == VK_FORMAT_B8G8R8A8_UNORM && - f.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { - surfaceCap->is_format_unorm_bgra32 = true; - break; - } - }*/ -} - -static void set_image_layout(VkCommandBuffer buffer, HLE::Libs::Graphics::VulkanImage* dst_image, - uint32_t base_level, uint32_t levels, VkImageAspectFlags aspect_mask, - VkImageLayout old_image_layout, VkImageLayout new_image_layout) { - VkImageMemoryBarrier imageMemoryBarrier{}; - imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - imageMemoryBarrier.pNext = nullptr; - imageMemoryBarrier.srcAccessMask = 0; - imageMemoryBarrier.dstAccessMask = 0; - imageMemoryBarrier.oldLayout = old_image_layout; - imageMemoryBarrier.newLayout = new_image_layout; - imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.image = dst_image->image; - imageMemoryBarrier.subresourceRange.aspectMask = aspect_mask; - imageMemoryBarrier.subresourceRange.baseMipLevel = base_level; - imageMemoryBarrier.subresourceRange.levelCount = levels; - imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - - if (old_image_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { - imageMemoryBarrier.srcAccessMask = 0; - } - - if (new_image_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - imageMemoryBarrier.dstAccessMask = 0; - } - - if (new_image_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - imageMemoryBarrier.dstAccessMask = 0; - } - - if (old_image_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - imageMemoryBarrier.srcAccessMask = 0; - } - - if (old_image_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { - imageMemoryBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - } - - if (new_image_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - imageMemoryBarrier.srcAccessMask = 0; - imageMemoryBarrier.dstAccessMask = 0; - } - - if (new_image_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { - imageMemoryBarrier.dstAccessMask = 0; - } - - if (new_image_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) { - imageMemoryBarrier.dstAccessMask = 0; - } - - VkPipelineStageFlags src_stages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - VkPipelineStageFlags dest_stages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - - // vkCmdPipelineBarrier(buffer, src_stages, dest_stages, 0, 0, nullptr, 0, nullptr, 1, - // &imageMemoryBarrier); - - dst_image->layout = new_image_layout; -} - -void Graphics::Vulkan::vulkanBlitImage(GPU::CommandBuffer* buffer, - HLE::Libs::Graphics::VulkanImage* src_image, - Emu::VulkanSwapchain* dst_swapchain) { - auto* vk_buffer = buffer->getPool()->buffers[buffer->getIndex()]; - - HLE::Libs::Graphics::VulkanImage swapchain_image(HLE::Libs::Graphics::VulkanImageType::Unknown); - - swapchain_image.image = dst_swapchain->swapchain_images[dst_swapchain->current_index]; - swapchain_image.layout = VK_IMAGE_LAYOUT_UNDEFINED; - - set_image_layout(vk_buffer, src_image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - set_image_layout(vk_buffer, &swapchain_image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkImageBlit region{}; - region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.srcSubresource.mipLevel = 0; - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcOffsets[0].x = 0; - region.srcOffsets[0].y = 0; - region.srcOffsets[0].z = 0; - region.srcOffsets[1].x = static_cast(src_image->extent.width); - region.srcOffsets[1].y = static_cast(src_image->extent.height); - region.srcOffsets[1].z = 1; - region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.dstSubresource.mipLevel = 0; - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstOffsets[0].x = 0; - region.dstOffsets[0].y = 0; - region.dstOffsets[0].z = 0; - region.dstOffsets[1].x = static_cast(dst_swapchain->swapchain_extent.width); - region.dstOffsets[1].y = static_cast(dst_swapchain->swapchain_extent.height); - region.dstOffsets[1].z = 1; - - // vkCmdBlitImage(vk_buffer, src_image->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - // swapchain_image.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, - // VK_FILTER_LINEAR); - - set_image_layout(vk_buffer, src_image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); -} - -void Graphics::Vulkan::vulkanFillImage(HLE::Libs::Graphics::GraphicCtx* ctx, - HLE::Libs::Graphics::VulkanImage* dst_image, - const void* src_data, u64 size, u32 src_pitch, - u64 dst_layout) { - HLE::Libs::Graphics::VulkanBuffer staging_buffer{}; - staging_buffer.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - staging_buffer.memory.property = - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - vulkanCreateBuffer(ctx, size, &staging_buffer); - - void* data = nullptr; - // vkMapMemory(ctx->m_device, staging_buffer.memory.memory, staging_buffer.memory.offset, - // staging_buffer.memory.requirements.size, 0, &data); - // std::memcpy(data, src_data, size); - // vkUnmapMemory(ctx->m_device, staging_buffer.memory.memory); - - GPU::CommandBuffer buffer(9); - - buffer.begin(); - vulkanBufferToImage(&buffer, &staging_buffer, src_pitch, dst_image, dst_layout); - buffer.end(); - buffer.execute(); - buffer.waitForFence(); - - vulkanDeleteBuffer(ctx, &staging_buffer); -} - -void Graphics::Vulkan::vulkanBufferToImage(GPU::CommandBuffer* buffer, - HLE::Libs::Graphics::VulkanBuffer* src_buffer, - u32 src_pitch, - HLE::Libs::Graphics::VulkanImage* dst_image, - u64 dst_layout) { - auto* vk_buffer = buffer->getPool()->buffers[buffer->getIndex()]; - - set_image_layout(vk_buffer, dst_image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkBufferImageCopy region{}; - region.bufferOffset = 0; - region.bufferRowLength = (src_pitch != dst_image->extent.width ? src_pitch : 0); - region.bufferImageHeight = 0; - - region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - - region.imageOffset = {0, 0, 0}; - region.imageExtent = {dst_image->extent.width, dst_image->extent.height, 1}; - - // vkCmdCopyBufferToImage(vk_buffer, src_buffer->buffer, dst_image->image, - // VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); - - set_image_layout(vk_buffer, dst_image, 0, 1, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(dst_layout)); -} - -void Graphics::Vulkan::vulkanCreateBuffer(HLE::Libs::Graphics::GraphicCtx* ctx, u64 size, - HLE::Libs::Graphics::VulkanBuffer* buffer) { - VkBufferCreateInfo buffer_info{}; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.size = size; - buffer_info.usage = buffer->usage; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - - // vkCreateBuffer(ctx->m_device, &buffer_info, nullptr, &buffer->buffer); - - // vkGetBufferMemoryRequirements(ctx->m_device, buffer->buffer, &buffer->memory.requirements); - - bool allocated = GPU::vulkanAllocateMemory(ctx, &buffer->memory); - if (!allocated) { - fmt::print("Can't allocate vulkan\n"); - std::exit(0); - } - // vkBindBufferMemory(ctx->m_device, buffer->buffer, buffer->memory.memory, - // buffer->memory.offset); -} - -void Graphics::Vulkan::vulkanDeleteBuffer(HLE::Libs::Graphics::GraphicCtx* ctx, - HLE::Libs::Graphics::VulkanBuffer* buffer) { - // vkDestroyBuffer(ctx->m_device, buffer->buffer, nullptr); - // vkFreeMemory(ctx->m_device, buffer->memory.memory, nullptr); - buffer->memory.memory = nullptr; - buffer->buffer = nullptr; -} diff --git a/src/vulkan_util.h b/src/vulkan_util.h deleted file mode 100644 index 18291a9d..00000000 --- a/src/vulkan_util.h +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include "core/PS4/HLE/Graphics/graphics_render.h" -#include "emulator.h" - -namespace Graphics::Vulkan { - -constexpr int VULKAN_QUEUES_NUM = 11; // Total of the above -constexpr int VULKAN_QUEUE_GRAPHICS_NUM = 1; -constexpr int VULKAN_QUEUE_TRANSFER_NUM = 1; -constexpr int VULKAN_QUEUE_PRESENT_NUM = 1; -constexpr int VULKAN_QUEUE_COMPUTE_NUM = 8; - -constexpr int VULKAN_QUEUE_GFX = 8; -constexpr int VULKAN_QUEUE_UTIL = 9; -constexpr int VULKAN_QUEUE_PRESENT = 10; - -void vulkanCreate(Emu::WindowCtx* ctx); -void vulkanGetInstanceExtensions(Emu::VulkanExt* ext); -void vulkanFindCompatiblePhysicalDevice(VkInstance instance, VkSurfaceKHR surface, - const std::vector& device_extensions, - Emu::VulkanSurfaceCapabilities* out_capabilities, - VkPhysicalDevice* out_device, - Emu::VulkanQueues* out_queues); -VkDevice vulkanCreateDevice(VkPhysicalDevice physical_device, VkSurfaceKHR surface, - const Emu::VulkanExt* r, const Emu::VulkanQueues& queues, - const std::vector& device_extensions); -Emu::VulkanQueues vulkanFindQueues(VkPhysicalDevice device, VkSurfaceKHR surface); -void vulkanGetSurfaceCapabilities(VkPhysicalDevice physical_device, VkSurfaceKHR surface, - Emu::VulkanSurfaceCapabilities* surfaceCap); -void vulkanCreateQueues(HLE::Libs::Graphics::GraphicCtx* ctx, const Emu::VulkanQueues& queues); -Emu::VulkanSwapchain vulkanCreateSwapchain(HLE::Libs::Graphics::GraphicCtx* ctx, u32 image_count); -void vulkanBlitImage(GPU::CommandBuffer* buffer, HLE::Libs::Graphics::VulkanImage* src_image, - Emu::VulkanSwapchain* dst_swapchain); -void vulkanFillImage(HLE::Libs::Graphics::GraphicCtx* ctx, - HLE::Libs::Graphics::VulkanImage* dst_image, const void* src_data, u64 size, - u32 src_pitch, u64 dst_layout); -void vulkanBufferToImage(GPU::CommandBuffer* buffer, HLE::Libs::Graphics::VulkanBuffer* src_buffer, - u32 src_pitch, HLE::Libs::Graphics::VulkanImage* dst_image, - u64 dst_layout); -void vulkanCreateBuffer(HLE::Libs::Graphics::GraphicCtx* ctx, u64 size, - HLE::Libs::Graphics::VulkanBuffer* buffer); -void vulkanDeleteBuffer(HLE::Libs::Graphics::GraphicCtx* ctx, - HLE::Libs::Graphics::VulkanBuffer* buffer); -}; // namespace Graphics::Vulkan