diff --git a/CMakeLists.txt b/CMakeLists.txt index d706359c..b4006dd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -240,6 +240,7 @@ set(COMMON src/common/logging/backend.cpp src/common/rdtsc.cpp src/common/rdtsc.h src/common/singleton.h + src/common/slot_vector.h src/common/string_util.cpp src/common/string_util.h src/common/thread.cpp @@ -420,7 +421,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/image_view.h src/video_core/texture_cache/sampler.cpp src/video_core/texture_cache/sampler.h - src/video_core/texture_cache/slot_vector.h src/video_core/texture_cache/texture_cache.cpp src/video_core/texture_cache/texture_cache.h src/video_core/texture_cache/tile_manager.cpp diff --git a/src/video_core/texture_cache/slot_vector.h b/src/common/slot_vector.h similarity index 100% rename from src/video_core/texture_cache/slot_vector.h rename to src/common/slot_vector.h diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index ceafa7ba..fe9780f5 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -113,8 +113,10 @@ struct AddressSpace::Impl { return ptr; } - void* MapPrivate(VAddr virtual_addr, size_t size, u64 alignment, ULONG prot) { + void* MapPrivate(VAddr virtual_addr, size_t size, u64 alignment, ULONG prot, + bool no_commit = false) { // Map a private allocation + PVOID addr = reinterpret_cast(virtual_addr); MEM_ADDRESS_REQUIREMENTS req{}; MEM_EXTENDED_PARAMETER param{}; // req.LowestStartingAddress = @@ -124,9 +126,27 @@ struct AddressSpace::Impl { req.Alignment = alignment < 64_KB ? 0 : alignment; param.Type = MemExtendedParameterAddressRequirements; param.Pointer = &req; - ULONG alloc_type = MEM_COMMIT | MEM_RESERVE | (alignment > 2_MB ? MEM_LARGE_PAGES : 0); - void* const ptr = VirtualAlloc2(process, nullptr, size, alloc_type, prot, ¶m, 1); - ASSERT_MSG(ptr, "{}", Common::GetLastErrorMsg()); + ULONG alloc_type = MEM_RESERVE | (alignment > 2_MB ? MEM_LARGE_PAGES : 0); + if (!no_commit) { + alloc_type |= MEM_COMMIT; + } + // Check if the area has been reserved beforehand (typically for tesselation buffer) + // and in that case don't reserve it again as Windows complains. + if (virtual_addr) { + MEMORY_BASIC_INFORMATION info; + VirtualQuery(addr, &info, sizeof(info)); + if (info.State == MEM_RESERVE) { + alloc_type &= ~MEM_RESERVE; + } + } + void* ptr{}; + if (virtual_addr) { + ptr = VirtualAlloc2(process, addr, size, alloc_type, prot, NULL, 0); + ASSERT_MSG(ptr && VAddr(ptr) == virtual_addr, "{}", Common::GetLastErrorMsg()); + } else { + ptr = VirtualAlloc2(process, nullptr, size, alloc_type, prot, ¶m, 1); + ASSERT_MSG(ptr, "{}", Common::GetLastErrorMsg()); + } return ptr; } @@ -224,7 +244,8 @@ struct AddressSpace::Impl { return nullptr; } - void* MapPrivate(VAddr virtual_addr, size_t size, u64 alignment, PosixPageProtection prot) { + void* MapPrivate(VAddr virtual_addr, size_t size, u64 alignment, PosixPageProtection prot, + bool no_commit = false) { UNREACHABLE(); return nullptr; } @@ -271,4 +292,8 @@ void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission per return impl->Protect(virtual_addr, size, true, true, true); } +void* AddressSpace::Reserve(size_t size, u64 alignment) { + return impl->MapPrivate(0, size, alignment, PAGE_READWRITE, true); +} + } // namespace Core diff --git a/src/core/address_space.h b/src/core/address_space.h index 322ab9c7..0e344358 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -49,6 +49,8 @@ public: void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms); + void* Reserve(size_t size, u64 alignment); + private: struct Impl; std::unique_ptr impl; diff --git a/src/core/libraries/error_codes.h b/src/core/libraries/error_codes.h index d4ae7edc..e4aac622 100644 --- a/src/core/libraries/error_codes.h +++ b/src/core/libraries/error_codes.h @@ -246,6 +246,13 @@ constexpr int SCE_VIDEO_OUT_ERROR_SLOT_OCCUPIED = 0x80290010; // slot alr constexpr int SCE_VIDEO_OUT_ERROR_FLIP_QUEUE_FULL = 0x80290012; // flip queue is full constexpr int SCE_VIDEO_OUT_ERROR_INVALID_OPTION = 0x8029001A; // Invalid buffer attribute option +// GnmDriver +constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_PIPE_ID = 0x80D17000; +constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_QUEUE_ID = 0x80D17001; +constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_BASE_ADDR = 0x80D17003; +constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_SIZE = 0x80D17002; +constexpr int ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR = 0x80D17004; + // Generic constexpr int ORBIS_OK = 0x00000000; constexpr int ORBIS_FAIL = 0xFFFFFFFF; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 4df2709f..c33a0546 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -5,10 +5,12 @@ #include "common/config.h" #include "common/logging/log.h" #include "common/path_util.h" +#include "common/slot_vector.h" #include "core/libraries/error_codes.h" #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" +#include "core/memory.h" #include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -32,6 +34,17 @@ static constexpr bool g_fair_hw_init = false; static u32 submission_lock{}; static u64 frames_submitted{}; // frame counter +struct AscQueueInfo { + VAddr map_addr; + u32* read_addr; + u32 ring_size_dw; +}; +static VideoCore::SlotVector asc_queues{}; + +static constexpr u32 TessellationFactorRingSize = 128_KB; +static constexpr u32 TessellationFactorRingAlignment = 64_KB; // toolkit is using this alignment +VAddr tessellation_factors_ring_addr{0}; + static void DumpCommandList(std::span cmd_list, const std::string& postfix) { using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::PM4Dir); @@ -367,9 +380,18 @@ int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (size > 0xff) { + if constexpr (g_fair_hw_init) { + ASSERT_MSG(0, "Not implemented"); + } else { + cmdbuf = WriteHeader(cmdbuf, 0xff); + } + return 0x100; // it is a size, not a retcode + } + return 0; } u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) { @@ -379,7 +401,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); + cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -393,7 +415,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) { if constexpr (g_fair_hw_init) { ASSERT_MSG(0, "Not implemented"); } else { - cmdbuf = cmdbuf = WriteHeader(cmdbuf, 0xff); + cmdbuf = WriteHeader(cmdbuf, 0xff); } return 0x100; // it is a size, not a retcode } @@ -599,9 +621,15 @@ int PS4_SYSV_ABI sceGnmGetShaderStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress() { + LOG_TRACE(Lib_GnmDriver, "called"); + // Actual virtual buffer address is hardcoded in the driver to 0xff00'000 + if (tessellation_factors_ring_addr == 0) { + auto* memory = Core::Memory::Instance(); + tessellation_factors_ring_addr = + memory->Reserve(TessellationFactorRingSize, TessellationFactorRingAlignment); + } + return tessellation_factors_ring_addr; } int PS4_SYSV_ABI sceGnmGpuPaDebugEnter() { @@ -718,14 +746,44 @@ int PS4_SYSV_ABI sceGnmLogicalTcaUnitToPhysical() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmMapComputeQueue() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_base_addr, + u32 ring_size_dw, u32* read_ptr_addr) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (pipe_id >= Liverpool::NumComputePipes) { + return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_PIPE_ID; + } + + if (queue_id >= Liverpool::NumQueuesPerPipe) { + return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_QUEUE_ID; + } + + if (VAddr(ring_base_addr) % 256 != 0) { // alignment check + return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_BASE_ADDR; + } + + if (!std::has_single_bit(ring_size_dw)) { + return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_RING_SIZE; + } + + if (VAddr(read_ptr_addr) % 4 != 0) { // alignment check + return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR; + } + + auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw); + LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id, + vqid.index); + + return vqid.index; } -int PS4_SYSV_ABI sceGnmMapComputeQueueWithPriority() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmMapComputeQueueWithPriority(u32 pipe_id, u32 queue_id, VAddr ring_base_addr, + u32 ring_size_dw, u32* read_ptr_addr, + u32 pipePriority) { + LOG_TRACE(Lib_GnmDriver, "called"); + + (void)pipePriority; + return sceGnmMapComputeQueue(pipe_id, queue_id, ring_base_addr, ring_size_dw, read_ptr_addr); } int PS4_SYSV_ABI sceGnmPaDisableFlipCallbacks() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 0f0e454e..ce8944f5 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -54,7 +54,7 @@ int PS4_SYSV_ABI sceGnmDrawIndirect(); int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(); -int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(); +u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size); int PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(); @@ -97,7 +97,7 @@ int PS4_SYSV_ABI sceGnmGetResourceType(); int PS4_SYSV_ABI sceGnmGetResourceUserData(); int PS4_SYSV_ABI sceGnmGetShaderProgramBaseAddress(); int PS4_SYSV_ABI sceGnmGetShaderStatus(); -int PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress(); +VAddr PS4_SYSV_ABI sceGnmGetTheTessellationFactorRingBufferBaseAddress(); int PS4_SYSV_ABI sceGnmGpuPaDebugEnter(); int PS4_SYSV_ABI sceGnmGpuPaDebugLeave(); int PS4_SYSV_ABI sceGnmInsertDingDongMarker(); @@ -113,8 +113,11 @@ int PS4_SYSV_ABI sceGnmIsUserPaEnabled(); int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex(); int PS4_SYSV_ABI sceGnmLogicalCuMaskToPhysicalCuMask(); int PS4_SYSV_ABI sceGnmLogicalTcaUnitToPhysical(); -int PS4_SYSV_ABI sceGnmMapComputeQueue(); -int PS4_SYSV_ABI sceGnmMapComputeQueueWithPriority(); +int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_base_addr, + u32 ring_size_dw, u32* read_ptr_addr); +int PS4_SYSV_ABI sceGnmMapComputeQueueWithPriority(u32 pipe_id, u32 queue_id, VAddr ring_base_addr, + u32 ring_size_dw, u32* read_ptr_addr, + u32 pipePriority); int PS4_SYSV_ABI sceGnmPaDisableFlipCallbacks(); int PS4_SYSV_ABI sceGnmPaEnableFlipCallbacks(); int PS4_SYSV_ABI sceGnmPaHeartbeat(); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index acae3b52..5180a633 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -92,10 +92,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M it++; } ASSERT(it != vma_map.end()); - if (alignment > 0) { - ASSERT_MSG(it->second.base % alignment == 0, "Free region base is not aligned"); - } - mapped_addr = it->second.base; + mapped_addr = alignment > 0 ? Common::AlignUp(it->second.base, alignment) : it->second.base; } // Perform the mapping. diff --git a/src/core/memory.h b/src/core/memory.h index 7a623db7..87cd9188 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -115,6 +115,10 @@ public: int DirectMemoryQuery(PAddr addr, bool find_next, Libraries::Kernel::OrbisQueryInfo* out_info); + VAddr Reserve(size_t size, u64 alignment) { + return reinterpret_cast(impl.Reserve(size, alignment)); + } + std::pair GetVulkanBuffer(VAddr addr); private: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 20ae5bd5..d37acd21 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,12 +7,12 @@ #include #include +#include "common/slot_vector.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/sampler.h" -#include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/tile_manager.h" namespace Core::Libraries::VideoOut { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 4ba856c0..335d6d83 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -3,8 +3,8 @@ #pragma once +#include "common/slot_vector.h" #include "common/types.h" -#include "video_core/texture_cache/slot_vector.h" namespace VideoCore {