From c4061c3b95a63892c58096f13b320acea5df2af8 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 29 Jun 2024 17:39:31 +0200 Subject: [PATCH 1/8] amdgpu: fix for wrong constant ram offset in `WriteConstRam` decoding --- src/video_core/amdgpu/pm4_cmds.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 88697d31..5b49157a 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -518,13 +518,14 @@ struct PM4CmdEventWriteEos { struct PM4WriteConstRam { PM4Type3Header header; union { - BitField<0, 16, u32> offset; // in DWs + BitField<0, 16, u32> offset; ///< Starting DW granularity offset into the constant RAM. + ///< Thus, bits[1:0] are zero. u32 dw1; }; u32 data[0]; [[nodiscard]] u32 Offset() const { - return offset.Value() << 2u; + return offset.Value(); } [[nodiscard]] u32 Size() const { From cc945fb92e589815c35d8598e7e8f688b678af64 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 29 Jun 2024 17:53:03 +0200 Subject: [PATCH 2/8] shader_recompiler: a simple bfs in image arg producer search --- .../ir/passes/resource_tracking_pass.cpp | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index ac419197..acb6ec18 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include + #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" @@ -250,11 +252,25 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - IR::Inst* producer = inst.Arg(0).InstRecursive(); - ASSERT(producer->GetOpcode() == - IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) - producer->GetOpcode() == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - producer->GetOpcode() == IR::Opcode::GetUserData); + std::deque insts{&inst}; + const auto& pred = [](auto opcode) -> bool { + return (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) + opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) + opcode == IR::Opcode::GetUserData); + }; + + IR::Inst* producer{}; + while (!insts.empty() && (producer = insts.front(), !pred(producer->GetOpcode()))) { + for (auto arg_idx = 0u; arg_idx < producer->NumArgs(); ++arg_idx) { + const auto arg = producer->Arg(arg_idx); + if (arg.TryInstRecursive()) { + insts.push_back(arg.InstRecursive()); + } + } + insts.pop_front(); + } + + ASSERT(pred(producer->GetOpcode())); const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair { if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { return std::make_pair(producer->Arg(0).InstRecursive(), From 3663c60f3da4eb693ec1925eaa19fc3e5326ec55 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 29 Jun 2024 18:11:32 +0200 Subject: [PATCH 3/8] libraries: gnmdriver: fix for incorrect offset in `sceGnmDrawInitDefaultHardwareState175` --- src/core/libraries/gnmdriver/gnmdriver.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index c17dbce4..b0bb698c 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -664,9 +664,10 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) { cmdbuf = ClearContextState(cmdbuf); std::memcpy(cmdbuf, InitSequence175.data(), InitSequence175.size() * 4); + cmdbuf += InitSequence175.size(); - cmdbuf[0x7f] = 0xc07f1000; - cmdbuf[0x80] = 0; + constexpr auto cmdbuf_left = HwInitPacketSize - InitSequence175.size() - 0xc - 1; + WriteTrailingNop(cmdbuf); return HwInitPacketSize; } From 9121ecf123b2986ae68fdb51323627154047e25d Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 29 Jun 2024 20:23:04 +0200 Subject: [PATCH 4/8] renderer_vulkan: more formats conversion --- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 6594aab9..a6d4d708 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -325,6 +325,18 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16B16A16Sint; } + if (data_format == AmdGpu::DataFormat::Format16_16 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR16G16Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format10_11_11 && + num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eB10G11R11UfloatPack32; + } + if (data_format == AmdGpu::DataFormat::Format2_10_10_10 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eA2B10G10R10UnormPack32; + } if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eBc7SrgbBlock; } @@ -490,6 +502,8 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples) { return vk::SampleCountFlagBits::e2; case 4: return vk::SampleCountFlagBits::e4; + case 8: + return vk::SampleCountFlagBits::e8; default: UNREACHABLE(); } From 2386b4672683aba89608fdd132389b2522b99820 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 29 Jun 2024 23:12:18 +0200 Subject: [PATCH 5/8] libraries: gnmdriver: `sceGnmDrawInitToDefaultContextState` and `sceGnmDrawInitToDefaultContextState400` implementation --- src/core/libraries/gnmdriver/gnmdriver.cpp | 73 ++++++++++++++++++++-- src/core/libraries/gnmdriver/gnmdriver.h | 4 +- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b0bb698c..e87a0556 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -212,6 +212,53 @@ static constexpr std::array InitSequence350{ 0xc0016900u, 0x2aau, 0xffu, }; static_assert(InitSequence350.size() == 0x7c); + +static constexpr std::array CtxInitSequence{ + 0xc0012800u, 0x80000000u, 0x80000000u, + 0xc0001200u, 0u, + 0xc0002f00u, 1u, + 0xc0016900u, 0x102u, 0u, + 0xc0016900u, 0x202u, 0xcc0010u, + 0xc0111000u, 0u +}; +static_assert(CtxInitSequence.size() == 0x0f); + +static constexpr std::array CtxInitSequence400{ + 0xc0012800u, 0x80000000u, 0x80000000u, + 0xc0001200u, 0u, + 0xc0016900u, 0x2f9u, 0x2du, + 0xc0016900u, 0x282u, 8u, + 0xc0016900u, 0x280u, 0x80008u, + 0xc0016900u, 0x281u, 0xffff0000u, + 0xc0016900u, 0x204u, 0u, + 0xc0016900u, 0x206u, 0x43fu, + 0xc0016900u, 0x83u, 0xffffu, + 0xc0016900u, 0x317u, 0x10u, + 0xc0016900u, 0x2fau, 0x3f800000u, + 0xc0016900u, 0x2fcu, 0x3f800000u, + 0xc0016900u, 0x2fbu, 0x3f800000u, + 0xc0016900u, 0x2fdu, 0x3f800000u, + 0xc0016900u, 0x202u, 0xcc0010u, + 0xc0016900u, 0x30eu, 0xffffffffu, + 0xc0016900u, 0x30fu, 0xffffffffu, + 0xc0002f00u, 1u, + 0xc0016900u, 0x1b1u, 2u, + 0xc0016900u, 0x101u, 0u, + 0xc0016900u, 0x100u, 0xffffffffu, + 0xc0016900u, 0x103u, 0u, + 0xc0016900u, 0x284u, 0u, + 0xc0016900u, 0x290u, 0u, + 0xc0016900u, 0x2aeu, 0u, + 0xc0016900u, 0x102u, 0u, + 0xc0016900u, 0x292u, 0u, + 0xc0016900u, 0x293u, 0x6020000u, + 0xc0016900u, 0x2f8u, 0u, + 0xc0016900u, 0x2deu, 0x1e9u, + 0xc0036900u, 0x295u, 0x100u, 0x100u, 4u, + 0xc0016900u, 0x2aau, 0xffu, + 0xc09e1000u, +}; +static_assert(CtxInitSequence400.size() == 0x61); // clang-format on // In case if `submitDone` is issued we need to block submissions until GPU idle @@ -724,14 +771,28 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { return SetupContext350(cmdbuf, size, true); } -int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + constexpr auto CtxInitPacketSize = 0x20u; + if (size != CtxInitPacketSize) { + return 0; + } + + std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4); + return CtxInitPacketSize; } -int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + constexpr auto CtxInitPacketSize = 0x100u; + if (size != CtxInitPacketSize) { + return 0; + } + + std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4); + return CtxInitPacketSize; } int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 915775d5..2971d66b 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -60,8 +60,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size); -int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(); -int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(); +u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size); +u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size); int PS4_SYSV_ABI sceGnmDrawOpaqueAuto(); int PS4_SYSV_ABI sceGnmDriverCaptureInProgress(); int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface(); From 14377b39b51fdd334fde4b609dd9e5b9876d7bc3 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 30 Jun 2024 15:54:59 +0200 Subject: [PATCH 6/8] texture_cache: detiler: added missing micro8x2 --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/detile_m8x2.comp | 61 +++++++++++++++++++ src/video_core/texture_cache/tile_manager.cpp | 10 ++- src/video_core/texture_cache/tile_manager.h | 1 + 4 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 src/video_core/host_shaders/detile_m8x2.comp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f9b948c3..f2b6cc2d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,6 +3,7 @@ set(SHADER_FILES detile_m8x1.comp + detile_m8x2.comp detile_m32x1.comp detile_m32x2.comp detile_m32x4.comp diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp new file mode 100644 index 00000000..d93f9a7f --- /dev/null +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +// NOTE: Current subgroup utilization is subotimal on most GPUs, so +// it will be nice to process two tiles at once here. +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(rg8ui, binding = 1) uniform writeonly uimage2D output_img; + +layout(push_constant) uniform image_info { + uint pitch; +} info; + +#define MICRO_TILE_DIM 8 +#define TEXELS_PER_ELEMENT 2 + +// Inverse morton LUT, small enough to fit into K$ +uint rmort[16] = { + 0x11011000, 0x31213020, + 0x13031202, 0x33233222, + 0x51415040, 0x71617060, + 0x53435242, 0x73637262, + + 0x15051404, 0x35253424, + 0x17071606, 0x37273626, + 0x55455444, 0x75657464, + 0x57475646, 0x77677666, +}; + +void main() { + uint src_tx = in_data[gl_GlobalInvocationID.x]; + uint p[TEXELS_PER_ELEMENT] = { + (src_tx >> 16) & 0xffff, + src_tx & 0xffff + }; + + uint bit_ofs = 8 * TEXELS_PER_ELEMENT * (gl_LocalInvocationID.x % 4); + uint packed_pos = rmort[gl_LocalInvocationID.x >> 1] >> bit_ofs; + uint col = bitfieldExtract(packed_pos, 4, 4); + uint row = bitfieldExtract(packed_pos, 0, 4); + + uint tiles_per_pitch = info.pitch >> 3; // log2(MICRO_TILE_DIM) + uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch; + uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch; + uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col; + uint dw_ofs_y = target_tile_y * MICRO_TILE_DIM + row; + + ivec2 img_pos = ivec2(dw_ofs_x, dw_ofs_y); + + #pragma unroll + for (int ofs = 0; ofs < TEXELS_PER_ELEMENT; ++ofs) { + uint p0 = (p[ofs] >> 8) & 0xff; + uint p1 = p[ofs] & 0xff; + imageStore(output_img, img_pos + ivec2(ofs, 0), uvec4(p0, p1, 0, 0)); + } +} diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index b2ff753b..0b6fd0eb 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -12,6 +12,7 @@ #include "video_core/host_shaders/detile_m32x2_comp.h" #include "video_core/host_shaders/detile_m32x4_comp.h" #include "video_core/host_shaders/detile_m8x1_comp.h" +#include "video_core/host_shaders/detile_m8x2_comp.h" #include #include @@ -177,6 +178,8 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { case vk::Format::eR8Unorm: return vk::Format::eR8Uint; + case vk::Format::eR8G8Unorm: + return vk::Format::eR8G8Uint; case vk::Format::eR8G8B8A8Srgb: [[fallthrough]]; case vk::Format::eB8G8R8A8Srgb: @@ -207,6 +210,8 @@ const DetilerContext* TileManager::GetDetiler(const Image& image) const { switch (format) { case vk::Format::eR8Uint: return &detilers[DetilerType::Micro8x1]; + case vk::Format::eR8G8Uint: + return &detilers[DetilerType::Micro8x2]; case vk::Format::eR32Uint: return &detilers[DetilerType::Micro32x1]; case vk::Format::eR32G32Uint: @@ -229,9 +234,8 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc staging{instance, scheduler, StagingFlags, 64_MB, Vulkan::BufferType::Upload} { static const std::array detiler_shaders{ - HostShaders::DETILE_M8X1_COMP, - HostShaders::DETILE_M32X1_COMP, - HostShaders::DETILE_M32X2_COMP, + HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, + HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, HostShaders::DETILE_M32X4_COMP, }; diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 3a74de22..98a33786 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -19,6 +19,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format); enum DetilerType : u32 { Micro8x1, + Micro8x2, Micro32x1, Micro32x2, Micro32x4, From 1f55eff9d884e4fd15b51ea5862b1536720288eb Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 30 Jun 2024 18:22:39 +0200 Subject: [PATCH 7/8] libraries: gnmdriver: added initialization preamble to every first submit in a frame --- src/core/libraries/gnmdriver/gnmdriver.cpp | 22 ++++++++++++++++++- src/core/libraries/kernel/libkernel.cpp | 2 +- src/core/libraries/kernel/libkernel.h | 1 + src/emulator.cpp | 8 +++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 ++--- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index e87a0556..40b1f897 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -8,6 +8,7 @@ #include "common/slot_vector.h" #include "core/libraries/error_codes.h" #include "core/libraries/gnmdriver/gnmdriver.h" +#include "core/libraries/kernel/libkernel.h" #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" #include "core/platform.h" @@ -264,7 +265,9 @@ static_assert(CtxInitSequence400.size() == 0x61); // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; static std::mutex m_submission{}; -static u64 frames_submitted{}; // frame counter +static u64 frames_submitted{}; // frame counter +static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame +static int sdk_version{0}; struct AscQueueInfo { VAddr map_addr; @@ -1935,6 +1938,17 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ submission_lock = 0; } + if (send_init_packet) { + if (sdk_version <= 0x1ffffffu) { + liverpool->SubmitGfx(InitSequence, {}); + } else if (sdk_version <= 0x3ffffffu) { + liverpool->SubmitGfx(InitSequence200, {}); + } else { + liverpool->SubmitGfx(InitSequence350, {}); + } + send_init_packet = false; + } + for (auto cbpair = 0u; cbpair < count; ++cbpair) { const auto* ccb = ccb_gpu_addrs ? ccb_gpu_addrs[cbpair] : nullptr; const auto ccb_size_in_bytes = ccb_sizes_in_bytes ? ccb_sizes_in_bytes[cbpair] : 0; @@ -1977,6 +1991,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() { submission_lock = true; } liverpool->NotifySubmitDone(); + send_init_packet = true; ++frames_submitted; return ORBIS_OK; } @@ -2450,6 +2465,11 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) { liverpool = std::make_unique(); renderer = std::make_unique(*g_window, liverpool.get()); + const int result = sceKernelGetCompiledSdkVersion(&sdk_version); + if (result != ORBIS_OK) { + sdk_version = 0; + } + LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent); LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAreSubmitsAllowed); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index e7e7d11d..fce0d621 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -154,7 +154,7 @@ int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { int version = param_sfo->GetInteger("SYSTEM_VER"); LOG_INFO(Kernel, "returned system version = {:#x}", version); *ver = version; - return ORBIS_OK; + return (version > 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; } s64 PS4_SYSV_ABI ps4__read(int d, void* buf, u64 nbytes) { diff --git a/src/core/libraries/kernel/libkernel.h b/src/core/libraries/kernel/libkernel.h index 0cc6b0b2..a33c6a71 100644 --- a/src/core/libraries/kernel/libkernel.h +++ b/src/core/libraries/kernel/libkernel.h @@ -28,6 +28,7 @@ typedef struct { } OrbisKernelUuid; int* PS4_SYSV_ABI __Error(); +int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver); void LibKernel_Register(Core::Loader::SymbolsResolver* sym); diff --git a/src/emulator.cpp b/src/emulator.cpp index 77ba91f0..09be9273 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -42,10 +42,6 @@ Emulator::Emulator() : window{WindowWidth, WindowHeight, controller} { // Start logger. Common::Log::Initialize(); Common::Log::Start(); - - // Initialize kernel and library facilities. - Libraries::Kernel::init_pthreads(); - Libraries::InitHLELibs(&linker->GetHLESymbols()); } Emulator::~Emulator() { @@ -93,6 +89,10 @@ void Emulator::Run(const std::filesystem::path& file) { const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id; mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir + // Initialize kernel and library facilities. + Libraries::Kernel::init_pthreads(); + Libraries::InitHLELibs(&linker->GetHLESymbols()); + // Load the module with the linker linker->LoadModule(file); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a6d4b770..71a09ea2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -122,9 +122,8 @@ void PipelineCache::RefreshGraphicsKey() { key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); } - // TODO: Should be a check for `OperationMode::Disable` once we emulate HW state init packet - // sent by system software. - const auto skip_cb_binding = false; + const auto skip_cb_binding = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. From f03262421e3fac5203e27eab3bce7961cfed5616 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 1 Jul 2024 09:58:52 +0200 Subject: [PATCH 8/8] texture_cache: force storage usage bit to all images --- src/video_core/texture_cache/image.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 750cc437..b5a6a0d4 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -91,9 +91,13 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { usage |= vk::ImageUsageFlagBits::eColorAttachment; } } - if (info.is_tiled || info.usage.storage) { - usage |= vk::ImageUsageFlagBits::eStorage; - } + + // In cases where an image is created as a render/depth target and cleared with compute, + // we cannot predict whether it will be used as a storage image. A proper solution would + // involve re-creating the resource with a new configuration and copying previous content into + // it. However, for now, we will set storage usage for all images (if the format allows), + // sacrificing a bit of performance. Note use of ExtendedUsage flag set by default. + usage |= vk::ImageUsageFlagBits::eStorage; return usage; } @@ -217,7 +221,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { - vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat}; + vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | + vk::ImageCreateFlagBits::eExtendedUsage}; if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 && info.size.width == info.size.height) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; @@ -225,11 +230,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, if (info.type == vk::ImageType::e3D) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } - if (info.is_tiled) { - flags |= vk::ImageCreateFlagBits::eExtendedUsage; - if (info.IsBlockCoded()) { - flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; - } + if (info.IsBlockCoded()) { + flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; } usage = ImageUsageFlags(info);