Merge pull request #80 from shadps4-emu/linux
common: Rework timekeeping with native RDTSC and port to linux
This commit is contained in:
commit
32a5ff15bb
|
@ -33,11 +33,10 @@
|
||||||
path = third-party/toml11
|
path = third-party/toml11
|
||||||
url = https://github.com/ToruNiina/toml11
|
url = https://github.com/ToruNiina/toml11
|
||||||
branch = master
|
branch = master
|
||||||
[submodule "third-party/vulkan"]
|
|
||||||
path = third-party/vulkan
|
|
||||||
url = https://github.com/shadps4/vulkan.git
|
|
||||||
branch = main
|
|
||||||
[submodule "third-party/xxHash"]
|
[submodule "third-party/xxHash"]
|
||||||
path = third-party/xxHash
|
path = third-party/xxHash
|
||||||
url = https://github.com/Cyan4973/xxHash.git
|
url = https://github.com/Cyan4973/xxHash.git
|
||||||
branch = dev
|
branch = dev
|
||||||
|
[submodule "third-party/vulkan"]
|
||||||
|
path = third-party/vulkan
|
||||||
|
url = https://github.com/GPUCode/vulkan
|
||||||
|
|
|
@ -30,8 +30,8 @@ endfunction()
|
||||||
add_subdirectory(third-party)
|
add_subdirectory(third-party)
|
||||||
include_directories(src)
|
include_directories(src)
|
||||||
|
|
||||||
set(LIBC_SOURCES src/core/hle/libraries/libc/Libc.cpp
|
set(LIBC_SOURCES src/core/hle/libraries/libc/libc.cpp
|
||||||
src/core/hle/libraries/libc/Libc.h
|
src/core/hle/libraries/libc/libc.h
|
||||||
src/core/hle/libraries/libc/printf.h
|
src/core/hle/libraries/libc/printf.h
|
||||||
src/core/hle/libraries/libc/va_ctx.h
|
src/core/hle/libraries/libc/va_ctx.h
|
||||||
src/core/hle/libraries/libc/libc_cxa.cpp
|
src/core/hle/libraries/libc/libc_cxa.cpp
|
||||||
|
@ -77,12 +77,15 @@ add_executable(shadps4
|
||||||
src/common/fs_file.h
|
src/common/fs_file.h
|
||||||
src/common/log.cpp
|
src/common/log.cpp
|
||||||
src/common/log.h
|
src/common/log.h
|
||||||
|
src/common/native_clock.cpp
|
||||||
|
src/common/native_clock.h
|
||||||
|
src/common/rdtsc.cpp
|
||||||
|
src/common/rdtsc.h
|
||||||
src/common/singleton.h
|
src/common/singleton.h
|
||||||
src/common/string_util.cpp
|
src/common/string_util.cpp
|
||||||
src/common/string_util.h
|
src/common/string_util.h
|
||||||
src/common/timer.cpp
|
|
||||||
src/common/timer.h
|
|
||||||
src/common/types.h
|
src/common/types.h
|
||||||
|
src/common/uint128.h
|
||||||
src/common/version.h
|
src/common/version.h
|
||||||
${LIBC_SOURCES}
|
${LIBC_SOURCES}
|
||||||
${USERSERVICE_SOURCES}
|
${USERSERVICE_SOURCES}
|
||||||
|
@ -143,8 +146,6 @@ add_executable(shadps4
|
||||||
src/core/PS4/HLE/Graphics/graphics_render.h
|
src/core/PS4/HLE/Graphics/graphics_render.h
|
||||||
src/core/PS4/GPU/tile_manager.cpp
|
src/core/PS4/GPU/tile_manager.cpp
|
||||||
src/core/PS4/GPU/tile_manager.h
|
src/core/PS4/GPU/tile_manager.h
|
||||||
src/emuTimer.cpp
|
|
||||||
src/emuTimer.h
|
|
||||||
src/core/hle/libraries/libkernel/time_management.cpp
|
src/core/hle/libraries/libkernel/time_management.cpp
|
||||||
src/core/hle/libraries/libkernel/time_management.h
|
src/core/hle/libraries/libkernel/time_management.h
|
||||||
"src/common/io_file.cpp" "src/common/io_file.h")
|
"src/common/io_file.cpp" "src/common/io_file.h")
|
||||||
|
@ -154,13 +155,15 @@ create_target_directory_groups(shadps4)
|
||||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt spdlog::spdlog toml11::toml11)
|
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt spdlog::spdlog toml11::toml11)
|
||||||
target_link_libraries(shadps4 PRIVATE discord-rpc imgui SDL3-shared vulkan-1 xxhash Zydis)
|
target_link_libraries(shadps4 PRIVATE discord-rpc imgui SDL3-shared vulkan-1 xxhash Zydis)
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
target_link_libraries(shadps4 PRIVATE mincore winpthread)
|
target_link_libraries(shadps4 PRIVATE mincore winpthread clang_rt.builtins-x86_64.lib)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(TARGET shadps4 POST_BUILD
|
add_custom_command(TARGET shadps4 POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
$<TARGET_FILE:SDL3-shared>
|
$<TARGET_FILE:SDL3-shared>
|
||||||
$<TARGET_FILE_DIR:shadps4>)
|
$<TARGET_FILE_DIR:shadps4>)
|
||||||
|
if (WIN32)
|
||||||
add_custom_command(TARGET shadps4 POST_BUILD
|
add_custom_command(TARGET shadps4 POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
"${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $<TARGET_FILE_DIR:shadps4>)
|
"${PROJECT_SOURCE_DIR}/third-party/winpthread/bin/libwinpthread-1.dll" $<TARGET_FILE_DIR:shadps4>)
|
||||||
|
endif()
|
||||||
|
|
|
@ -109,7 +109,11 @@ int Init(bool use_stdout) {
|
||||||
if (use_stdout) {
|
if (use_stdout) {
|
||||||
sinks.push_back(std::make_shared<spdlog::sinks::stdout_color_sink_mt>());
|
sinks.push_back(std::make_shared<spdlog::sinks::stdout_color_sink_mt>());
|
||||||
}
|
}
|
||||||
|
#ifdef _WIN64
|
||||||
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>(L"shadps4.txt", true));
|
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>(L"shadps4.txt", true));
|
||||||
|
#else
|
||||||
|
sinks.push_back(std::make_shared<spdlog::sinks::basic_file_sink_mt>("shadps4.txt", true));
|
||||||
|
#endif
|
||||||
spdlog::set_default_logger(std::make_shared<spdlog::logger>("shadps4 logger", begin(sinks), end(sinks)));
|
spdlog::set_default_logger(std::make_shared<spdlog::logger>("shadps4 logger", begin(sinks), end(sinks)));
|
||||||
auto f = std::make_unique<spdlog::pattern_formatter>("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol
|
auto f = std::make_unique<spdlog::pattern_formatter>("%^|%L|: %v%$", spdlog::pattern_time_type::local, std::string("")); // disable eol
|
||||||
spdlog::set_formatter(std::move(f));
|
spdlog::set_formatter(std::move(f));
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/uint128.h"
|
||||||
|
#include "common/native_clock.h"
|
||||||
|
#include "common/rdtsc.h"
|
||||||
|
#ifdef _WIN64
|
||||||
|
#include <pthread_time.h>
|
||||||
|
#else
|
||||||
|
#include <time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
NativeClock::NativeClock()
|
||||||
|
: rdtsc_frequency{EstimateRDTSCFrequency()}, ns_rdtsc_factor{GetFixedPoint64Factor(std::nano::den,
|
||||||
|
rdtsc_frequency)},
|
||||||
|
us_rdtsc_factor{GetFixedPoint64Factor(std::micro::den, rdtsc_frequency)},
|
||||||
|
ms_rdtsc_factor{GetFixedPoint64Factor(std::milli::den, rdtsc_frequency)} {}
|
||||||
|
|
||||||
|
u64 NativeClock::GetTimeNS() const {
|
||||||
|
return MultiplyHigh(GetUptime(), ns_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 NativeClock::GetTimeUS() const {
|
||||||
|
return MultiplyHigh(GetUptime(), us_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 NativeClock::GetTimeMS() const {
|
||||||
|
return MultiplyHigh(GetUptime(), ms_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 NativeClock::GetUptime() const {
|
||||||
|
return FencedRDTSC();
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 NativeClock::GetProcessTimeUS() const {
|
||||||
|
timespec ret;
|
||||||
|
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret);
|
||||||
|
return ret.tv_nsec / 1000 + ret.tv_sec * 1000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
|
@ -0,0 +1,32 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
class NativeClock final {
|
||||||
|
public:
|
||||||
|
explicit NativeClock();
|
||||||
|
|
||||||
|
u64 GetTscFrequency() const {
|
||||||
|
return rdtsc_frequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetTimeNS() const;
|
||||||
|
u64 GetTimeUS() const;
|
||||||
|
u64 GetTimeMS() const;
|
||||||
|
u64 GetUptime() const;
|
||||||
|
u64 GetProcessTimeUS() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
u64 rdtsc_frequency;
|
||||||
|
u64 ns_rdtsc_factor;
|
||||||
|
u64 us_rdtsc_factor;
|
||||||
|
u64 ms_rdtsc_factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,60 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
#include "common/rdtsc.h"
|
||||||
|
#include "common/uint128.h"
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
static constexpr size_t SecondToNanoseconds = 1000000000ULL;
|
||||||
|
|
||||||
|
template <u64 Nearest>
|
||||||
|
static u64 RoundToNearest(u64 value) {
|
||||||
|
const auto mod = value % Nearest;
|
||||||
|
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 GetTimeNs() {
|
||||||
|
#ifdef _WIN64
|
||||||
|
// GetSystemTimePreciseAsFileTime returns the file time in 100ns units.
|
||||||
|
static constexpr u64 Multiplier = 100;
|
||||||
|
// Convert Windows epoch to Unix epoch.
|
||||||
|
static constexpr u64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL;
|
||||||
|
FILETIME filetime;
|
||||||
|
GetSystemTimePreciseAsFileTime(&filetime);
|
||||||
|
return Multiplier * ((static_cast<u64>(filetime.dwHighDateTime) << 32) +
|
||||||
|
static_cast<u64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return clock_gettime_nsec_np(CLOCK_REALTIME);
|
||||||
|
#else
|
||||||
|
timespec ts;
|
||||||
|
clock_gettime(CLOCK_REALTIME, &ts);
|
||||||
|
return ts.tv_sec * SecondToNanoseconds + ts.tv_nsec;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 EstimateRDTSCFrequency() {
|
||||||
|
// Discard the first result measuring the rdtsc.
|
||||||
|
FencedRDTSC();
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||||
|
FencedRDTSC();
|
||||||
|
|
||||||
|
// Get the current time.
|
||||||
|
const auto start_time = GetTimeNs();
|
||||||
|
const u64 tsc_start = FencedRDTSC();
|
||||||
|
// Wait for 100 milliseconds.
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds{100});
|
||||||
|
const auto end_time = GetTimeNs();
|
||||||
|
const u64 tsc_end = FencedRDTSC();
|
||||||
|
// Calculate differences.
|
||||||
|
const u64 tsc_diff = tsc_end - tsc_start;
|
||||||
|
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, end_time - start_time);
|
||||||
|
return RoundToNearest<100'000>(tsc_freq);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,37 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__forceinline static u64 FencedRDTSC() {
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
const u64 result = __rdtsc();
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline u64 FencedRDTSC() {
|
||||||
|
u64 eax;
|
||||||
|
u64 edx;
|
||||||
|
asm volatile("lfence\n\t"
|
||||||
|
"rdtsc\n\t"
|
||||||
|
"lfence\n\t"
|
||||||
|
: "=a"(eax), "=d"(edx));
|
||||||
|
return (edx << 32) | eax;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
u64 EstimateRDTSCFrequency();
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -1,108 +0,0 @@
|
||||||
#include "common/timer.h"
|
|
||||||
|
|
||||||
#ifdef _WIN64
|
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Common {
|
|
||||||
|
|
||||||
Timer::Timer() {
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER f;
|
|
||||||
QueryPerformanceFrequency(&f);
|
|
||||||
m_Frequency = f.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer constructor
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void Timer::Start() {
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
m_StartTime = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::Start()
|
|
||||||
#endif
|
|
||||||
m_is_timer_paused = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Timer::Pause() {
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
m_PauseTime = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::Pause()
|
|
||||||
#endif
|
|
||||||
m_is_timer_paused = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Timer::Resume() {
|
|
||||||
u64 current_time = 0;
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
current_time = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::Resume()
|
|
||||||
#endif
|
|
||||||
m_StartTime += current_time - m_PauseTime;
|
|
||||||
m_is_timer_paused = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
double Timer::GetTimeMsec() const {
|
|
||||||
if (m_is_timer_paused) {
|
|
||||||
return 1000.0 * (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 current_time = 0;
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
current_time = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::GetTimeMsec()
|
|
||||||
#endif
|
|
||||||
return 1000.0 * (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
double Timer::GetTimeSec() const {
|
|
||||||
if (m_is_timer_paused) {
|
|
||||||
return (static_cast<double>(m_PauseTime - m_StartTime)) / static_cast<double>(m_Frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 current_time = 0;
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
current_time = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::GetTimeSec()
|
|
||||||
#endif
|
|
||||||
return (static_cast<double>(current_time - m_StartTime)) / static_cast<double>(m_Frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 Timer::GetTicks() const {
|
|
||||||
if (m_is_timer_paused) {
|
|
||||||
return (m_PauseTime - m_StartTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 current_time = 0;
|
|
||||||
#ifdef _WIN64
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
current_time = c.QuadPart;
|
|
||||||
#else
|
|
||||||
#error Unimplemented Timer::GetTicks()
|
|
||||||
#endif
|
|
||||||
return (current_time - m_StartTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 Timer::getQueryPerformanceCounter() {
|
|
||||||
LARGE_INTEGER c;
|
|
||||||
QueryPerformanceCounter(&c);
|
|
||||||
return c.QuadPart;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Common
|
|
|
@ -1,43 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common/types.h"
|
|
||||||
|
|
||||||
namespace Common {
|
|
||||||
|
|
||||||
class Timer final {
|
|
||||||
public:
|
|
||||||
Timer();
|
|
||||||
~Timer() = default;
|
|
||||||
|
|
||||||
void Start();
|
|
||||||
void Pause();
|
|
||||||
void Resume();
|
|
||||||
|
|
||||||
bool IsPaused() const {
|
|
||||||
return m_is_timer_paused;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 GetFrequency() const {
|
|
||||||
return m_Frequency;
|
|
||||||
}
|
|
||||||
|
|
||||||
double GetTimeMsec() const;
|
|
||||||
double GetTimeSec() const;
|
|
||||||
u64 GetTicks() const;
|
|
||||||
|
|
||||||
[[nodiscard]] static u64 getQueryPerformanceCounter();
|
|
||||||
|
|
||||||
public:
|
|
||||||
Timer(const Timer&) = delete;
|
|
||||||
Timer& operator=(const Timer&) = delete;
|
|
||||||
Timer(Timer&&) = delete;
|
|
||||||
Timer& operator=(Timer&&) = delete;
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool m_is_timer_paused = true;
|
|
||||||
u64 m_Frequency{};
|
|
||||||
u64 m_StartTime{};
|
|
||||||
u64 m_PauseTime{};
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Common
|
|
|
@ -1,5 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
using s08 = std::int8_t;
|
using s08 = std::int8_t;
|
||||||
|
@ -15,9 +16,12 @@ using u64 = std::uint64_t;
|
||||||
using f32 = float;
|
using f32 = float;
|
||||||
using f64 = double;
|
using f64 = double;
|
||||||
|
|
||||||
|
using u128 = std::array<std::uint64_t, 2>;
|
||||||
|
static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
|
||||||
|
|
||||||
#define PS4_SYSV_ABI __attribute__((sysv_abi))
|
#define PS4_SYSV_ABI __attribute__((sysv_abi))
|
||||||
|
|
||||||
// UDLs for memory size values
|
// UDLs for memory size values
|
||||||
constexpr u64 operator""_KB(u64 x) { return 1024ULL * x; }
|
constexpr unsigned long long operator""_KB(unsigned long long x) { return 1024ULL * x; }
|
||||||
constexpr u64 operator""_MB(u64 x) { return 1024_KB * x; }
|
constexpr unsigned long long operator""_MB(unsigned long long x) { return 1024_KB * x; }
|
||||||
constexpr u64 operator""_GB(u64 x) { return 1024_MB * x; }
|
constexpr unsigned long long operator""_GB(unsigned long long x) { return 1024_MB * x; }
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#ifndef __clang__
|
||||||
|
#define HAS_INTRINSICS
|
||||||
|
#include <intrin.h>
|
||||||
|
#pragma intrinsic(__umulh)
|
||||||
|
#pragma intrinsic(_umul128)
|
||||||
|
#pragma intrinsic(_udiv128)
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#include <cstring>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
// This function multiplies 2 u64 values and divides it by a u64 value.
|
||||||
|
[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
|
||||||
|
#ifdef HAS_INTRINSICS
|
||||||
|
u128 r{};
|
||||||
|
r[0] = _umul128(a, b, &r[1]);
|
||||||
|
u64 remainder;
|
||||||
|
return _udiv128(r[1], r[0], d, &remainder);
|
||||||
|
#else
|
||||||
|
const u64 diva = a / d;
|
||||||
|
const u64 moda = a % d;
|
||||||
|
const u64 divb = b / d;
|
||||||
|
const u64 modb = b % d;
|
||||||
|
return diva * b + moda * divb + moda * modb / d;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function multiplies 2 u64 values and produces a u128 value;
|
||||||
|
[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
|
||||||
|
u128 result;
|
||||||
|
#ifdef HAS_INTRINSICS
|
||||||
|
result[0] = _umul128(a, b, &result[1]);
|
||||||
|
#else
|
||||||
|
unsigned __int128 tmp = a;
|
||||||
|
tmp *= b;
|
||||||
|
std::memcpy(&result, &tmp, sizeof(u128));
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
|
||||||
|
return static_cast<u64>(base / divisor);
|
||||||
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||||
|
std::array<u64, 2> r = {0, numerator};
|
||||||
|
u64 remainder;
|
||||||
|
return _udiv128(r[1], r[0], divisor, &remainder);
|
||||||
|
#else
|
||||||
|
// This one is bit more inaccurate.
|
||||||
|
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
|
||||||
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||||
|
return __umulh(a, b); // MSVC
|
||||||
|
#else
|
||||||
|
// Generic fallback
|
||||||
|
const u64 a_lo = u32(a);
|
||||||
|
const u64 a_hi = a >> 32;
|
||||||
|
const u64 b_lo = u32(b);
|
||||||
|
const u64 b_hi = b >> 32;
|
||||||
|
|
||||||
|
const u64 a_x_b_hi = a_hi * b_hi;
|
||||||
|
const u64 a_x_b_mid = a_hi * b_lo;
|
||||||
|
const u64 b_x_a_mid = b_hi * a_lo;
|
||||||
|
const u64 a_x_b_lo = a_lo * b_lo;
|
||||||
|
|
||||||
|
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
|
||||||
|
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
|
||||||
|
32;
|
||||||
|
|
||||||
|
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
|
||||||
|
|
||||||
|
return multhi;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function divides a u128 by a u32 value and produces two u64 values:
|
||||||
|
// the result of division and the remainder
|
||||||
|
[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
|
||||||
|
u64 remainder = dividend[0] % divisor;
|
||||||
|
u64 accum = dividend[0] / divisor;
|
||||||
|
if (dividend[1] == 0)
|
||||||
|
return {accum, remainder};
|
||||||
|
// We ignore dividend[1] / divisor as that overflows
|
||||||
|
const u64 first_segment = (dividend[1] % divisor) << 32;
|
||||||
|
accum += (first_segment / divisor) << 32;
|
||||||
|
const u64 second_segment = (first_segment % divisor) << 32;
|
||||||
|
accum += (second_segment / divisor);
|
||||||
|
remainder += second_segment % divisor;
|
||||||
|
if (remainder >= divisor) {
|
||||||
|
accum++;
|
||||||
|
remainder -= divisor;
|
||||||
|
}
|
||||||
|
return {accum, remainder};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -1,5 +1,5 @@
|
||||||
#include "gpu_memory.h"
|
#include "gpu_memory.h"
|
||||||
|
#include <atomic>
|
||||||
#include <xxh3.h>
|
#include <xxh3.h>
|
||||||
|
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <vulkan/vulkan_core.h>
|
#include <vulkan/vulkan.h>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
namespace HLE::Libs::Graphics {
|
namespace HLE::Libs::Graphics {
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "common/timer.h"
|
#include "core/hle/kernel/Objects/event_queue.h"
|
||||||
#include "core/hle/kernel/objects/event_queue.h"
|
|
||||||
|
|
||||||
namespace Core::Kernel {
|
namespace Core::Kernel {
|
||||||
|
|
||||||
|
@ -24,28 +23,19 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
|
||||||
|
|
||||||
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
||||||
std::unique_lock lock{m_mutex};
|
std::unique_lock lock{m_mutex};
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
u32 timeElapsed = 0;
|
const auto predicate = [&] {
|
||||||
Common::Timer t;
|
ret = getTriggeredEvents(ev, num);
|
||||||
t.Start();
|
return ret > 0;
|
||||||
|
};
|
||||||
for (;;) {
|
|
||||||
int ret = getTriggeredEvents(ev, num);
|
|
||||||
|
|
||||||
if (ret > 0 || (timeElapsed >= micros && micros != 0)) {
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (micros == 0) {
|
if (micros == 0) {
|
||||||
m_cond.wait(lock);
|
m_cond.wait(lock, predicate);
|
||||||
} else {
|
} else {
|
||||||
m_cond.wait_for(lock, std::chrono::microseconds(micros - timeElapsed));
|
m_cond.wait_for(lock, std::chrono::microseconds(micros), predicate);
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
timeElapsed = static_cast<uint32_t>(t.GetTimeSec() * 1000000.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) {
|
bool EqueueInternal::triggerEvent(u64 ident, s16 filter, void* trigger_data) {
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <condition_variable>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace Core::Kernel {
|
namespace Core::Kernel {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "core/hle/kernel/objects/physical_memory.h"
|
#include "core/hle/kernel/Objects/physical_memory.h"
|
||||||
|
|
||||||
namespace Core::Kernel {
|
namespace Core::Kernel {
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "core/hle/kernel/objects/event_queue.h"
|
#include "core/hle/kernel/Objects/event_queue.h"
|
||||||
|
|
||||||
namespace Core::Kernel {
|
namespace Core::Kernel {
|
||||||
|
|
||||||
|
|
|
@ -4,35 +4,35 @@
|
||||||
namespace Core::Libraries::LibC {
|
namespace Core::Libraries::LibC {
|
||||||
|
|
||||||
float PS4_SYSV_ABI ps4_atan2f(float y, float x) {
|
float PS4_SYSV_ABI ps4_atan2f(float y, float x) {
|
||||||
return std::atan2f(y, x);
|
return atan2f(y, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
float PS4_SYSV_ABI ps4_acosf(float num) {
|
float PS4_SYSV_ABI ps4_acosf(float num) {
|
||||||
return std::acosf(num);
|
return acosf(num);
|
||||||
}
|
}
|
||||||
|
|
||||||
float PS4_SYSV_ABI ps4_tanf(float num) {
|
float PS4_SYSV_ABI ps4_tanf(float num) {
|
||||||
return std::tanf(num);
|
return tanf(num);
|
||||||
}
|
}
|
||||||
|
|
||||||
float PS4_SYSV_ABI ps4_asinf(float num) {
|
float PS4_SYSV_ABI ps4_asinf(float num) {
|
||||||
return std::asinf(num);
|
return asinf(num);
|
||||||
}
|
}
|
||||||
|
|
||||||
double PS4_SYSV_ABI ps4_pow(double base, double exponent) {
|
double PS4_SYSV_ABI ps4_pow(double base, double exponent) {
|
||||||
return std::pow(base, exponent);
|
return pow(base, exponent);
|
||||||
}
|
}
|
||||||
|
|
||||||
double PS4_SYSV_ABI ps4__Sin(double x) {
|
double PS4_SYSV_ABI ps4__Sin(double x) {
|
||||||
return std::sin(x);
|
return sin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
float PS4_SYSV_ABI ps4__Fsin(float arg) {
|
float PS4_SYSV_ABI ps4__Fsin(float arg) {
|
||||||
return std::sinf(arg);
|
return sinf(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
double PS4_SYSV_ABI ps4_exp2(double arg) {
|
double PS4_SYSV_ABI ps4_exp2(double arg) {
|
||||||
return std::exp2(arg);
|
return exp2(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Core::Libraries::LibC
|
} // namespace Core::Libraries::LibC
|
||||||
|
|
|
@ -12,7 +12,7 @@ int PS4_SYSV_ABI ps4_printf(VA_ARGS) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) {
|
int PS4_SYSV_ABI ps4_fprintf(FILE* file, VA_ARGS) {
|
||||||
int fd = _fileno(file);
|
int fd = fileno(file);
|
||||||
if (fd == 1 || fd == 2) { // output stdout and stderr to console
|
if (fd == 1 || fd == 2) { // output stdout and stderr to console
|
||||||
VA_CTX(ctx);
|
VA_CTX(ctx);
|
||||||
return printf_ctx(&ctx);
|
return printf_ctx(&ctx);
|
||||||
|
|
|
@ -15,6 +15,8 @@
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
|
#else
|
||||||
|
#include <sys/mman.h>
|
||||||
#endif
|
#endif
|
||||||
#include "thread_management.h"
|
#include "thread_management.h"
|
||||||
|
|
||||||
|
@ -56,6 +58,7 @@ int* PS4_SYSV_ABI __Error() { return &libc_error; }
|
||||||
#define PROT_WRITE 0x2
|
#define PROT_WRITE 0x2
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) {
|
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, off_t offset, void** res) {
|
||||||
|
#ifdef _WIN64
|
||||||
PRINT_FUNCTION_NAME();
|
PRINT_FUNCTION_NAME();
|
||||||
if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported
|
if (prot > 3) // READ,WRITE or bitwise READ | WRITE supported
|
||||||
{
|
{
|
||||||
|
@ -86,6 +89,14 @@ int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd,
|
||||||
}
|
}
|
||||||
*res = ret;
|
*res = ret;
|
||||||
return 0;
|
return 0;
|
||||||
|
#else
|
||||||
|
void* result = mmap(addr, len, prot, flags, fd, offset);
|
||||||
|
if (result != MAP_FAILED) {
|
||||||
|
*res = result;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::abort();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) {
|
PS4_SYSV_ABI void* posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) {
|
||||||
|
|
|
@ -1,27 +1,31 @@
|
||||||
#include "common/timer.h"
|
#include "common/native_clock.h"
|
||||||
#include "core/hle/libraries/libkernel/time_management.h"
|
#include "core/hle/libraries/libkernel/time_management.h"
|
||||||
#include "core/hle/libraries/libs.h"
|
#include "core/hle/libraries/libs.h"
|
||||||
#include "emuTimer.h"
|
|
||||||
|
|
||||||
namespace Core::Libraries::LibKernel {
|
namespace Core::Libraries::LibKernel {
|
||||||
|
|
||||||
|
static u64 initial_ptc;
|
||||||
|
static std::unique_ptr<Common::NativeClock> clock;
|
||||||
|
|
||||||
u64 PS4_SYSV_ABI sceKernelGetProcessTime() {
|
u64 PS4_SYSV_ABI sceKernelGetProcessTime() {
|
||||||
return static_cast<u64>(Emulator::emuTimer::getTimeMsec() * 1000.0); // return time in microseconds
|
return clock->GetProcessTimeUS();
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() {
|
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter() {
|
||||||
return Emulator::emuTimer::getTimeCounter();
|
return clock->GetUptime() - initial_ptc;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() {
|
u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency() {
|
||||||
return Emulator::emuTimer::getTimeFrequency();
|
return clock->GetTscFrequency();
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 PS4_SYSV_ABI sceKernelReadTsc() {
|
u64 PS4_SYSV_ABI sceKernelReadTsc() {
|
||||||
return Common::Timer::getQueryPerformanceCounter();
|
return clock->GetUptime();
|
||||||
}
|
}
|
||||||
|
|
||||||
void timeSymbolsRegister(Loader::SymbolsResolver* sym) {
|
void timeSymbolsRegister(Loader::SymbolsResolver* sym) {
|
||||||
|
clock = std::make_unique<Common::NativeClock>();
|
||||||
|
initial_ptc = clock->GetUptime();
|
||||||
LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime);
|
LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime);
|
||||||
LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter);
|
LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter);
|
||||||
LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency);
|
LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency);
|
||||||
|
|
|
@ -658,12 +658,12 @@ void Linker::Resolve(const std::string& name, int Symtype, Module* m, Loader::Sy
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
__debugbreak();//den tha prepei na ftasoume edo
|
//__debugbreak();//den tha prepei na ftasoume edo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
__debugbreak();//oute edo mallon
|
//__debugbreak();//oute edo mallon
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
namespace Core::Loader {
|
namespace Core::Loader {
|
||||||
|
|
||||||
constexpr bool log_file_loader = true; // disable it to disable logging
|
constexpr bool log_file_loader = false; // disable it to disable logging
|
||||||
|
|
||||||
static std::string_view getProgramTypeName(program_type_es type) {
|
static std::string_view getProgramTypeName(program_type_es type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
|
@ -79,7 +79,8 @@ bool memory_protect(u64 address, u64 size, MemoryMode mode, MemoryMode* old_mode
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
#error Unimplement memory_protect function
|
int ret = mprotect(reinterpret_cast<void*>(address), size, convertMemoryMode(mode));
|
||||||
|
return true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,6 +118,7 @@ bool memory_patch(u64 vaddr, u64 value) {
|
||||||
static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); }
|
static u64 AlignUp(u64 pos, u64 align) { return (align != 0 ? (pos + (align - 1)) & ~(align - 1) : pos); }
|
||||||
|
|
||||||
u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) {
|
u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment) {
|
||||||
|
#ifdef _WIN64
|
||||||
// try allocate aligned address inside user area
|
// try allocate aligned address inside user area
|
||||||
MEM_ADDRESS_REQUIREMENTS req{};
|
MEM_ADDRESS_REQUIREMENTS req{};
|
||||||
MEM_EXTENDED_PARAMETER param{};
|
MEM_EXTENDED_PARAMETER param{};
|
||||||
|
@ -134,5 +136,13 @@ u64 memory_alloc_aligned(u64 address, u64 size, MemoryMode mode, u64 alignment)
|
||||||
LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err);
|
LOG_ERROR_IF(true, "VirtualAlloc2() failed: 0x{:X}\n", err);
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
|
#else
|
||||||
|
void* hint_address = reinterpret_cast<void*>(AlignUp(address, alignment));
|
||||||
|
void* ptr = mmap(hint_address, size, convertMemoryMode(mode), MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||||
|
if (ptr == MAP_FAILED) {
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
return reinterpret_cast<u64>(ptr);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
} // namespace VirtualMemory
|
} // namespace VirtualMemory
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
#include "common/timer.h"
|
|
||||||
|
|
||||||
namespace Emulator::emuTimer {
|
|
||||||
|
|
||||||
static Common::Timer timer;
|
|
||||||
|
|
||||||
void start() {
|
|
||||||
timer.Start();
|
|
||||||
}
|
|
||||||
|
|
||||||
double getTimeMsec() {
|
|
||||||
return timer.GetTimeMsec();
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 getTimeCounter() {
|
|
||||||
return timer.GetTicks();
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 getTimeFrequency() {
|
|
||||||
return timer.GetFrequency();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Emulator::emuTimer
|
|
|
@ -1,10 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common/types.h"
|
|
||||||
|
|
||||||
namespace Emulator::emuTimer {
|
|
||||||
void start();
|
|
||||||
double getTimeMsec();
|
|
||||||
u64 getTimeCounter();
|
|
||||||
u64 getTimeFrequency();
|
|
||||||
} // namespace Emulator::emuTimer
|
|
|
@ -1,6 +1,5 @@
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
#include <vulkan_util.h>
|
#include <vulkan_util.h>
|
||||||
#include "common/timer.h"
|
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
#include "common/version.h"
|
#include "common/version.h"
|
||||||
#include "emulator.h"
|
#include "emulator.h"
|
||||||
|
@ -90,8 +89,6 @@ static void calculateFps(double game_time_s) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void emuRun() {
|
void emuRun() {
|
||||||
Common::Timer timer;
|
|
||||||
timer.Start();
|
|
||||||
auto window_ctx = Common::Singleton<Emu::WindowCtx>::Instance();
|
auto window_ctx = Common::Singleton<Emu::WindowCtx>::Instance();
|
||||||
{
|
{
|
||||||
// init window and wait until init finishes
|
// init window and wait until init finishes
|
||||||
|
@ -100,7 +97,7 @@ void emuRun() {
|
||||||
Graphics::Vulkan::vulkanCreate(window_ctx);
|
Graphics::Vulkan::vulkanCreate(window_ctx);
|
||||||
window_ctx->m_is_graphic_initialized = true;
|
window_ctx->m_is_graphic_initialized = true;
|
||||||
window_ctx->m_graphic_initialized_cond.notify_one();
|
window_ctx->m_graphic_initialized_cond.notify_one();
|
||||||
calculateFps(timer.GetTimeSec());
|
calculateFps(0); // TODO: Proper fps
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exit_loop = false;
|
bool exit_loop = false;
|
||||||
|
@ -138,10 +135,6 @@ void emuRun() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (m_game_is_paused) {
|
if (m_game_is_paused) {
|
||||||
if (!timer.IsPaused()) {
|
|
||||||
timer.Pause();
|
|
||||||
}
|
|
||||||
|
|
||||||
SDL_WaitEvent(&event);
|
SDL_WaitEvent(&event);
|
||||||
|
|
||||||
switch (event.type) {
|
switch (event.type) {
|
||||||
|
@ -171,21 +164,13 @@ void emuRun() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
exit_loop = m_emu_needs_exit;
|
exit_loop = m_emu_needs_exit;
|
||||||
if (m_game_is_paused) {
|
if (!m_game_is_paused) {
|
||||||
if (!timer.IsPaused()) {
|
|
||||||
timer.Pause();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (timer.IsPaused()) {
|
|
||||||
timer.Resume();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!exit_loop) {
|
if (!exit_loop) {
|
||||||
update();
|
update();
|
||||||
}
|
}
|
||||||
if (!exit_loop) {
|
if (!exit_loop) {
|
||||||
if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec
|
if (HLE::Libs::Graphics::VideoOut::videoOutFlip(100000)) { // flip every 0.1 sec
|
||||||
calculateFps(timer.GetTimeSec());
|
calculateFps(0); // TODO: Proper fps
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -215,7 +200,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
||||||
window_ctx->swapchain.current_index = static_cast<u32>(-1);
|
window_ctx->swapchain.current_index = static_cast<u32>(-1);
|
||||||
|
|
||||||
auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr,
|
auto result = vkAcquireNextImageKHR(window_ctx->m_graphic_ctx.m_device, window_ctx->swapchain.swapchain, UINT64_MAX, nullptr,
|
||||||
window_ctx->swapchain.present_complete_fence, &window_ctx->swapchain.current_index);
|
VK_NULL_HANDLE, &window_ctx->swapchain.current_index);
|
||||||
|
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
fmt::print("Can't aquireNextImage\n");
|
fmt::print("Can't aquireNextImage\n");
|
||||||
|
@ -226,16 +211,6 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
||||||
std::exit(0);
|
std::exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
|
||||||
result = vkWaitForFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence, VK_TRUE, 100000000);
|
|
||||||
} while (result == VK_TIMEOUT);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
fmt::print("vkWaitForFences is not success\n");
|
|
||||||
std::exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vkResetFences(window_ctx->m_graphic_ctx.m_device, 1, &window_ctx->swapchain.present_complete_fence);
|
|
||||||
|
|
||||||
auto blt_src_image = image;
|
auto blt_src_image = image;
|
||||||
auto blt_dst_image = window_ctx->swapchain;
|
auto blt_dst_image = window_ctx->swapchain;
|
||||||
|
|
||||||
|
@ -272,6 +247,7 @@ void DrawBuffer(HLE::Libs::Graphics::VideoOutVulkanImage* image) {
|
||||||
|
|
||||||
buffer.end();
|
buffer.end();
|
||||||
buffer.executeWithSemaphore();
|
buffer.executeWithSemaphore();
|
||||||
|
buffer.waitForFence(); // HACK: The whole vulkan backend needs a rewrite
|
||||||
|
|
||||||
VkPresentInfoKHR present{};
|
VkPresentInfoKHR present{};
|
||||||
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
#include "core/PS4/HLE/Graphics/video_out.h"
|
#include "core/PS4/HLE/Graphics/video_out.h"
|
||||||
#include "core/hle/libraries/libs.h"
|
#include "core/hle/libraries/libs.h"
|
||||||
#include "core/linker.h"
|
#include "core/linker.h"
|
||||||
#include "emuTimer.h"
|
|
||||||
#include "emulator.h"
|
#include "emulator.h"
|
||||||
#include <core/hle/libraries/libkernel/thread_management.h>
|
#include <core/hle/libraries/libkernel/thread_management.h>
|
||||||
#include "core/file_sys/fs.h"
|
#include "core/file_sys/fs.h"
|
||||||
|
@ -31,7 +30,6 @@ int main(int argc, char* argv[]) {
|
||||||
auto height = Config::getScreenHeight();
|
auto height = Config::getScreenHeight();
|
||||||
Emu::emuInit(width, height);
|
Emu::emuInit(width, height);
|
||||||
HLE::Libs::Graphics::VideoOut::videoOutInit(width, height);
|
HLE::Libs::Graphics::VideoOut::videoOutInit(width, height);
|
||||||
Emulator::emuTimer::start();
|
|
||||||
|
|
||||||
// Argument 1 is the path of self file to boot
|
// Argument 1 is the path of self file to boot
|
||||||
const char* const path = argv[1];
|
const char* const path = argv[1];
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
#include "gpu_memory.h"
|
||||||
|
#include <atomic>
|
||||||
|
#include <xxh3.h>
|
||||||
|
|
||||||
|
#include "common/singleton.h"
|
||||||
|
|
||||||
|
void* GPU::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo /*CommandBuffer?*/, u64 virtual_addr, u64 size,
|
||||||
|
const GPUObject& info) {
|
||||||
|
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||||
|
|
||||||
|
return gpumemory->memoryCreateObj(submit_id, ctx, nullptr, &virtual_addr, &size, 1, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::memorySetAllocArea(u64 virtual_addr, u64 size) {
|
||||||
|
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||||
|
|
||||||
|
std::scoped_lock lock{gpumemory->m_mutex};
|
||||||
|
|
||||||
|
MemoryHeap h;
|
||||||
|
h.allocated_virtual_addr = virtual_addr;
|
||||||
|
h.allocated_size = size;
|
||||||
|
|
||||||
|
gpumemory->m_heaps.push_back(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GPU::calculate_hash(const u08* buf, u64 size) { return (size > 0 && buf != nullptr ? XXH3_64bits(buf, size) : 0); }
|
||||||
|
|
||||||
|
bool GPU::vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem) {
|
||||||
|
static std::atomic_uint64_t unique_id = 0;
|
||||||
|
|
||||||
|
VkPhysicalDeviceMemoryProperties memory_properties{};
|
||||||
|
vkGetPhysicalDeviceMemoryProperties(ctx->m_physical_device, &memory_properties);
|
||||||
|
|
||||||
|
u32 index = 0;
|
||||||
|
for (; index < memory_properties.memoryTypeCount; index++) {
|
||||||
|
if ((mem->requirements.memoryTypeBits & (static_cast<uint32_t>(1) << index)) != 0 &&
|
||||||
|
(memory_properties.memoryTypes[index].propertyFlags & mem->property) == mem->property) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mem->type = index;
|
||||||
|
mem->offset = 0;
|
||||||
|
|
||||||
|
VkMemoryAllocateInfo alloc_info{};
|
||||||
|
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||||
|
alloc_info.pNext = nullptr;
|
||||||
|
alloc_info.allocationSize = mem->requirements.size;
|
||||||
|
alloc_info.memoryTypeIndex = index;
|
||||||
|
|
||||||
|
mem->unique_id = ++unique_id;
|
||||||
|
|
||||||
|
auto result = vkAllocateMemory(ctx->m_device, &alloc_info, nullptr, &mem->memory);
|
||||||
|
|
||||||
|
if (result == VK_SUCCESS) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||||
|
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||||
|
gpumemory->flushAllHeaps(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int GPU::GPUMemory::getHeapId(u64 virtual_addr, u64 size) {
|
||||||
|
int index = 0;
|
||||||
|
for (const auto& heap : m_heaps) {
|
||||||
|
if ((virtual_addr >= heap.allocated_virtual_addr && virtual_addr < heap.allocated_virtual_addr + heap.allocated_size) ||
|
||||||
|
((virtual_addr + size - 1) >= heap.allocated_virtual_addr &&
|
||||||
|
(virtual_addr + size - 1) < heap.allocated_virtual_addr + heap.allocated_size)) {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* GPU::GPUMemory::memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, void* todo, const u64* virtual_addr, const u64* size,
|
||||||
|
int virtual_addr_num, const GPUObject& info) {
|
||||||
|
auto* gpumemory = Common::Singleton<GPUMemory>::Instance();
|
||||||
|
|
||||||
|
std::scoped_lock lock{gpumemory->m_mutex};
|
||||||
|
|
||||||
|
int heap_id = gpumemory->getHeapId(virtual_addr[0], size[0]);
|
||||||
|
|
||||||
|
if (heap_id < 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto& heap = m_heaps[heap_id];
|
||||||
|
|
||||||
|
ObjInfo objInfo = {};
|
||||||
|
|
||||||
|
// Copy parameters from info to obj
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
objInfo.obj_params[i] = info.obj_params[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
objInfo.gpu_object.objectType = info.objectType;
|
||||||
|
objInfo.gpu_object.obj = nullptr;
|
||||||
|
|
||||||
|
for (int h = 0; h < virtual_addr_num; h++) {
|
||||||
|
if (info.check_hash) {
|
||||||
|
objInfo.hash[h] = GPU::calculate_hash(reinterpret_cast<const u08*>(virtual_addr[h]), size[h]);
|
||||||
|
} else {
|
||||||
|
objInfo.hash[h] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
objInfo.submit_id = submit_id;
|
||||||
|
objInfo.check_hash = info.check_hash;
|
||||||
|
|
||||||
|
objInfo.gpu_object.obj = info.getCreateFunc()(ctx, objInfo.obj_params, virtual_addr, size, virtual_addr_num, &objInfo.mem);
|
||||||
|
|
||||||
|
objInfo.update_func = info.getUpdateFunc();
|
||||||
|
int index = static_cast<int>(heap.objects.size());
|
||||||
|
|
||||||
|
HeapObject hobj{};
|
||||||
|
hobj.block = createHeapBlock(virtual_addr, size, virtual_addr_num, heap_id, index);
|
||||||
|
hobj.info = objInfo;
|
||||||
|
hobj.free = false;
|
||||||
|
heap.objects.push_back(hobj);
|
||||||
|
|
||||||
|
return objInfo.gpu_object.obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU::HeapBlock GPU::GPUMemory::createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id) {
|
||||||
|
auto& heap = m_heaps[heap_id];
|
||||||
|
|
||||||
|
GPU::HeapBlock heapBlock{};
|
||||||
|
heapBlock.virtual_addr_num = virtual_addr_num;
|
||||||
|
for (int vi = 0; vi < virtual_addr_num; vi++) {
|
||||||
|
heapBlock.virtual_addr[vi] = virtual_addr[vi];
|
||||||
|
heapBlock.size[vi] = size[vi];
|
||||||
|
}
|
||||||
|
return heapBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::GPUMemory::update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id) {
|
||||||
|
auto& heap = m_heaps[heap_id];
|
||||||
|
|
||||||
|
auto& heapObj = heap.objects[obj_id];
|
||||||
|
auto& objInfo = heapObj.info;
|
||||||
|
bool need_update = false;
|
||||||
|
|
||||||
|
if (submit_id > objInfo.submit_id) {
|
||||||
|
uint64_t hash[3] = {};
|
||||||
|
|
||||||
|
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||||
|
if (objInfo.check_hash) {
|
||||||
|
hash[i] = GPU::calculate_hash(reinterpret_cast<const uint8_t*>(heapObj.block.virtual_addr[i]), heapObj.block.size[i]);
|
||||||
|
} else {
|
||||||
|
hash[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < heapObj.block.virtual_addr_num; i++) {
|
||||||
|
if (objInfo.hash[i] != hash[i]) {
|
||||||
|
need_update = true;
|
||||||
|
objInfo.hash[i] = hash[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (submit_id != UINT64_MAX) {
|
||||||
|
objInfo.submit_id = submit_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_update) {
|
||||||
|
objInfo.update_func(ctx, objInfo.obj_params, objInfo.gpu_object.obj, heapObj.block.virtual_addr, heapObj.block.size,
|
||||||
|
heapObj.block.virtual_addr_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::GPUMemory::flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx) {
|
||||||
|
std::scoped_lock lock{m_mutex};
|
||||||
|
|
||||||
|
int heap_id = 0;
|
||||||
|
for (auto& heap : m_heaps) {
|
||||||
|
int index = 0;
|
||||||
|
for (auto& heapObj : heap.objects) {
|
||||||
|
if (!heapObj.free) {
|
||||||
|
update(UINT64_MAX, ctx, heap_id, index);
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
heap_id++;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
#include <mutex>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
class GPUObject;
|
||||||
|
|
||||||
|
enum class MemoryMode : u32 {
|
||||||
|
NoAccess = 0,
|
||||||
|
Read = 1,
|
||||||
|
Write = 2,
|
||||||
|
ReadWrite = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class MemoryObjectType : u64 {
|
||||||
|
Invalid,
|
||||||
|
VideoOutBuffer,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GpuMemoryObject {
|
||||||
|
MemoryObjectType object_type = MemoryObjectType::Invalid;
|
||||||
|
void* obj = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HeapBlock {
|
||||||
|
std::array<u64, 3> virtual_address{};
|
||||||
|
std::array<u64, 3> size{};
|
||||||
|
u32 virtual_addr_num = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GPUObject {
|
||||||
|
public:
|
||||||
|
GPUObject() = default;
|
||||||
|
virtual ~GPUObject() = default;
|
||||||
|
u64 obj_params[8] = {};
|
||||||
|
bool check_hash = false;
|
||||||
|
bool isReadOnly = false;
|
||||||
|
MemoryObjectType objectType = MemoryObjectType::Invalid;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ObjInfo {
|
||||||
|
std::array<u64, 8> obj_params{};
|
||||||
|
GpuMemoryObject gpu_object;
|
||||||
|
std::array<u64, 3> hash{};
|
||||||
|
u64 submit_id = 0;
|
||||||
|
bool check_hash = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HeapObject {
|
||||||
|
HeapBlock block;
|
||||||
|
ObjInfo info;
|
||||||
|
bool free = true;
|
||||||
|
};
|
||||||
|
struct MemoryHeap {
|
||||||
|
u64 allocated_virtual_addr = 0;
|
||||||
|
u64 allocated_size = 0;
|
||||||
|
std::vector<HeapObject> objects;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GPUMemory {
|
||||||
|
public:
|
||||||
|
GPUMemory() {}
|
||||||
|
virtual ~GPUMemory() {}
|
||||||
|
int getHeapId(u64 vaddr, u64 size);
|
||||||
|
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, const u64* virtual_addr,
|
||||||
|
const u64* size, int virtual_addr_num, const GPUObject& info);
|
||||||
|
HeapBlock createHeapBlock(const u64* virtual_addr, const u64* size, int virtual_addr_num, int heap_id, int obj_id);
|
||||||
|
void update(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, int heap_id, int obj_id);
|
||||||
|
void flushAllHeaps(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::mutex m_mutex;
|
||||||
|
std::vector<MemoryHeap> m_heaps;
|
||||||
|
};
|
||||||
|
|
||||||
|
void memorySetAllocArea(u64 virtual_addr, u64 size);
|
||||||
|
void* memoryCreateObj(u64 submit_id, HLE::Libs::Graphics::GraphicCtx* ctx, /*CommandBuffer* buffer*/ void* todo, u64 virtual_addr, u64 size,
|
||||||
|
const GPUObject& info);
|
||||||
|
u64 calculate_hash(const u08* buf, u64 size);
|
||||||
|
bool vulkanAllocateMemory(HLE::Libs::Graphics::GraphicCtx* ctx, HLE::Libs::Graphics::VulkanMemory* mem);
|
||||||
|
void flushGarlic(HLE::Libs::Graphics::GraphicCtx* ctx);
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -0,0 +1,151 @@
|
||||||
|
#include <bit>
|
||||||
|
#include <cstring>
|
||||||
|
#include "video_core/tile_manager.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
class TileManager32 {
|
||||||
|
public:
|
||||||
|
u32 m_macro_tile_height = 0;
|
||||||
|
u32 m_bank_height = 0;
|
||||||
|
u32 m_num_banks = 0;
|
||||||
|
u32 m_num_pipes = 0;
|
||||||
|
u32 m_padded_width = 0;
|
||||||
|
u32 m_padded_height = 0;
|
||||||
|
u32 m_pipe_bits = 0;
|
||||||
|
u32 m_bank_bits = 0;
|
||||||
|
|
||||||
|
TileManager32(u32 width, u32 height, bool is_neo) {
|
||||||
|
m_macro_tile_height = (is_neo ? 128 : 64);
|
||||||
|
m_bank_height = is_neo ? 2 : 1;
|
||||||
|
m_num_banks = is_neo ? 8 : 16;
|
||||||
|
m_num_pipes = is_neo ? 16 : 8;
|
||||||
|
m_padded_width = width;
|
||||||
|
if (height == 1080) {
|
||||||
|
m_padded_height = is_neo ? 1152 : 1088;
|
||||||
|
}
|
||||||
|
if (height == 720) {
|
||||||
|
m_padded_height = 768;
|
||||||
|
}
|
||||||
|
m_pipe_bits = is_neo ? 4 : 3;
|
||||||
|
m_bank_bits = is_neo ? 3 : 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 GetElementIndex(u32 x, u32 y) {
|
||||||
|
u32 elem = 0;
|
||||||
|
elem |= ((x >> 0u) & 0x1u) << 0u;
|
||||||
|
elem |= ((x >> 1u) & 0x1u) << 1u;
|
||||||
|
elem |= ((y >> 0u) & 0x1u) << 2u;
|
||||||
|
elem |= ((x >> 2u) & 0x1u) << 3u;
|
||||||
|
elem |= ((y >> 1u) & 0x1u) << 4u;
|
||||||
|
elem |= ((y >> 2u) & 0x1u) << 5u;
|
||||||
|
|
||||||
|
return elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 GetPipeIndex(u32 x, u32 y, bool is_neo) {
|
||||||
|
u32 pipe = 0;
|
||||||
|
|
||||||
|
if (!is_neo) {
|
||||||
|
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||||
|
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||||
|
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||||
|
} else {
|
||||||
|
pipe |= (((x >> 3u) ^ (y >> 3u) ^ (x >> 4u)) & 0x1u) << 0u;
|
||||||
|
pipe |= (((x >> 4u) ^ (y >> 4u)) & 0x1u) << 1u;
|
||||||
|
pipe |= (((x >> 5u) ^ (y >> 5u)) & 0x1u) << 2u;
|
||||||
|
pipe |= (((x >> 6u) ^ (y >> 5u)) & 0x1u) << 3u;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pipe;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 GetBankIndex(u32 x, u32 y, u32 bank_width, u32 bank_height, u32 num_banks, u32 num_pipes) {
|
||||||
|
const u32 x_shift_offset = std::bit_width(bank_width * num_pipes);
|
||||||
|
const u32 y_shift_offset = std::bit_width(bank_height);
|
||||||
|
const u32 xs = x >> x_shift_offset;
|
||||||
|
const u32 ys = y >> y_shift_offset;
|
||||||
|
u32 bank = 0;
|
||||||
|
switch (num_banks) {
|
||||||
|
case 8:
|
||||||
|
bank |= (((xs >> 3u) ^ (ys >> 5u)) & 0x1u) << 0u;
|
||||||
|
bank |= (((xs >> 4u) ^ (ys >> 4u) ^ (ys >> 5u)) & 0x1u) << 1u;
|
||||||
|
bank |= (((xs >> 5u) ^ (ys >> 3u)) & 0x1u) << 2u;
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
bank |= (((xs >> 3u) ^ (ys >> 6u)) & 0x1u) << 0u;
|
||||||
|
bank |= (((xs >> 4u) ^ (ys >> 5u) ^ (ys >> 6u)) & 0x1u) << 1u;
|
||||||
|
bank |= (((xs >> 5u) ^ (ys >> 4u)) & 0x1u) << 2u;
|
||||||
|
bank |= (((xs >> 6u) ^ (ys >> 3u)) & 0x1u) << 3u;
|
||||||
|
break;
|
||||||
|
default:;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bank;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetTiledOffset(u32 x, u32 y, bool is_neo) const {
|
||||||
|
u64 element_index = GetElementIndex(x, y);
|
||||||
|
|
||||||
|
u32 xh = x;
|
||||||
|
u32 yh = y;
|
||||||
|
u64 pipe = GetPipeIndex(xh, yh, is_neo);
|
||||||
|
u64 bank = GetBankIndex(xh, yh, 1, m_bank_height, m_num_banks, m_num_pipes);
|
||||||
|
u32 tile_bytes = (8 * 8 * 32 + 7) / 8;
|
||||||
|
u64 element_offset = (element_index * 32);
|
||||||
|
u64 tile_split_slice = 0;
|
||||||
|
|
||||||
|
if (tile_bytes > 512) {
|
||||||
|
tile_split_slice = element_offset / (static_cast<u64>(512) * 8);
|
||||||
|
element_offset %= (static_cast<u64>(512) * 8);
|
||||||
|
tile_bytes = 512;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 macro_tile_bytes = (128 / 8) * (m_macro_tile_height / 8) * tile_bytes / (m_num_pipes * m_num_banks);
|
||||||
|
u64 macro_tiles_per_row = m_padded_width / 128;
|
||||||
|
u64 macro_tile_row_index = y / m_macro_tile_height;
|
||||||
|
u64 macro_tile_column_index = x / 128;
|
||||||
|
u64 macro_tile_index = (macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index;
|
||||||
|
u64 macro_tile_offset = macro_tile_index * macro_tile_bytes;
|
||||||
|
u64 macro_tiles_per_slice = macro_tiles_per_row * (m_padded_height / m_macro_tile_height);
|
||||||
|
u64 slice_bytes = macro_tiles_per_slice * macro_tile_bytes;
|
||||||
|
u64 slice_offset = tile_split_slice * slice_bytes;
|
||||||
|
u64 tile_row_index = (y / 8) % m_bank_height;
|
||||||
|
u64 tile_index = tile_row_index;
|
||||||
|
u64 tile_offset = tile_index * tile_bytes;
|
||||||
|
|
||||||
|
u64 tile_split_slice_rotation = ((m_num_banks / 2) + 1) * tile_split_slice;
|
||||||
|
bank ^= tile_split_slice_rotation;
|
||||||
|
bank &= (m_num_banks - 1);
|
||||||
|
|
||||||
|
u64 total_offset = (slice_offset + macro_tile_offset + tile_offset) * 8 + element_offset;
|
||||||
|
u64 bit_offset = total_offset & 0x7u;
|
||||||
|
total_offset /= 8;
|
||||||
|
|
||||||
|
u64 pipe_interleave_offset = total_offset & 0xffu;
|
||||||
|
u64 offset = total_offset >> 8u;
|
||||||
|
u64 byte_offset = pipe_interleave_offset | (pipe << (8u)) | (bank << (8u + m_pipe_bits)) | (offset << (8u + m_pipe_bits + m_bank_bits));
|
||||||
|
|
||||||
|
return ((byte_offset << 3u) | bit_offset) / 8;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void ConvertTileToLinear(u08* dst, const u08* src,u32 width, u32 height, bool is_neo) {
|
||||||
|
const TileManager32 t{width, height, is_neo};
|
||||||
|
for (u32 y = 0; y < height; y++) {
|
||||||
|
u32 x = 0;
|
||||||
|
u64 linear_offset = y * width * 4;
|
||||||
|
|
||||||
|
for (; x + 1 < width; x += 2) {
|
||||||
|
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||||
|
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u64));
|
||||||
|
linear_offset += sizeof(u64);
|
||||||
|
}
|
||||||
|
if (x < width) {
|
||||||
|
auto tiled_offset = t.GetTiledOffset(x, y, is_neo);
|
||||||
|
std::memcpy(dst + linear_offset, src + tiled_offset, sizeof(u32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -0,0 +1,9 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
void ConvertTileToLinear(void* dst, const void* src, u32 width, u32 height, bool neo);
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -30,7 +30,6 @@ add_subdirectory(toml11 EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
# Vulkan
|
# Vulkan
|
||||||
add_subdirectory(vulkan EXCLUDE_FROM_ALL)
|
add_subdirectory(vulkan EXCLUDE_FROM_ALL)
|
||||||
target_include_directories(vulkan-1 INTERFACE vulkan/include)
|
|
||||||
|
|
||||||
# Winpthreads
|
# Winpthreads
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -66,6 +65,6 @@ target_include_directories(imgui PUBLIC
|
||||||
imgui/include
|
imgui/include
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis winpthread discord-rpc)
|
target_link_libraries(imgui PRIVATE SDL3-shared ${CMAKE_DL_LIBS} Zydis discord-rpc)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 661b23edeb52d400cf5812e7330f14f05c072fab
|
Subproject commit 8e42eef4950feb5d2b76574a9cd2591dfaae2449
|
|
@ -1 +1 @@
|
||||||
Subproject commit 76dfc7e7c0d3c69d3cdaa3399b63545235ccbb02
|
Subproject commit 134f9194bb93072b72b8cfa27ac3bb30a0fb5b57
|
|
@ -1 +1 @@
|
||||||
Subproject commit 8c9feb4f480b32f7c7421af546aa6ffb558bdd5e
|
Subproject commit 72b2e740754bc6b86b724fa5b2c90dca6f69462e
|
Loading…
Reference in New Issue