This commit is contained in:
IndecisiveTurtle 2024-07-04 15:32:01 +03:00
parent 414c7bd825
commit 58dcd6473d
38 changed files with 209 additions and 63 deletions

View File

@ -28,7 +28,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
// User area size is normally larger than this. However games are unlikely to map to high // User area size is normally larger than this. However games are unlikely to map to high
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th). // regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
// to save space on page tables. // to save space on page tables.
static constexpr size_t UserSize = 1ULL << 38; static constexpr size_t UserSize = 1ULL << 39;
static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN; static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN;
/** /**

View File

@ -13,6 +13,7 @@ void MntPoints::Mount(const std::filesystem::path& host_folder, const std::strin
MntPair pair; MntPair pair;
pair.host_path = host_folder.string(); pair.host_path = host_folder.string();
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
pair.guest_path = guest_folder; pair.guest_path = guest_folder;
m_mnt_pairs.push_back(pair); m_mnt_pairs.push_back(pair);
@ -40,17 +41,36 @@ std::string MntPoints::GetHostDirectory(const std::string& guest_directory) {
return ""; return "";
} }
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
std::string MntPoints::GetHostFile(const std::string& guest_file) { std::string MntPoints::GetHostFile(const std::string& guest_file) {
std::scoped_lock lock{m_mutex}; std::scoped_lock lock{m_mutex};
for (auto& pair : m_mnt_pairs) { for (auto& pair : m_mnt_pairs) {
// horrible code but it works :D // horrible code but it works :D
int find = guest_file.find(pair.guest_path); int find = guest_file.find(pair.guest_path);
if (find == 0) { if (find != 0) {
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1); continue;
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
return pair.host_path + npath;
} }
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
const auto host_path = pair.host_path + npath;
#ifndef _WIN64
const std::filesystem::path path{host_path};
if (!std::filesystem::exists(path)) {
const auto filename = ToLower(path.filename());
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
const auto exist_filename = ToLower(file.path().filename());
if (filename == exist_filename) {
return file.path();
}
}
}
#endif
return host_path;
} }
return ""; return "";
} }

View File

@ -344,8 +344,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
} }
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() { int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() {
LOG_TRACE(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
return submission_lock == 0; return liverpool->IsGpuIdle();
} }
int PS4_SYSV_ABI sceGnmBeginWorkload() { int PS4_SYSV_ABI sceGnmBeginWorkload() {
@ -803,9 +803,9 @@ int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress() { bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_OK; return false;
} }
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() { int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() {
@ -1930,8 +1930,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
} }
} }
LOG_INFO(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
if (submission_lock != 0) { if (submission_lock != 0) {
liverpool->WaitGpuIdle(); liverpool->WaitGpuIdle();
LOG_INFO(Lib_GnmDriver, "Done waiting for GPU");
// Suspend logic goes here // Suspend logic goes here

View File

@ -63,7 +63,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto(); int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress(); bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface(); int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger(); int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException(); int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException();

View File

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
file->m_host_name = mnt->GetHostFile(file->m_guest_name); file->m_host_name = mnt->GetHostFile(file->m_guest_name);
if (read) { if (read) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
} else if (write && create && truncate) { } else if (write && create) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (write && create && append) { // CUSA04729 (appends app0/shaderlist.txt) } else if (write && create && append) { // CUSA04729 (appends app0/shaderlist.txt)
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
@ -90,7 +90,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
} }
if (!file->f.IsOpen()) { if (!file->f.IsOpen()) {
h->DeleteHandle(handle); h->DeleteHandle(handle);
return SCE_KERNEL_ERROR_EACCES; return SCE_KERNEL_ERROR_ENOENT;
} }
} }
file->is_opened = true; file->is_opened = true;

View File

@ -110,10 +110,13 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
} }
} }
const VAddr in_addr = reinterpret_cast<VAddr>(*addr); VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto mem_prot = static_cast<Core::MemoryProt>(prot); const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags); const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
return memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", return memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "",
false, directMemoryStart, alignment); false, directMemoryStart, alignment);
} }
@ -143,10 +146,13 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
return ORBIS_KERNEL_ERROR_EFAULT; return ORBIS_KERNEL_ERROR_EFAULT;
} }
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out); VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot); const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags); const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
const int ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags, const int ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name); Core::VMAType::Flexible, name);
@ -161,7 +167,6 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
} }
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
LOG_WARNING(Kernel_Vmm, "called");
auto* memory = Core::Memory::Instance(); auto* memory = Core::Memory::Instance();
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot); return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
} }

View File

@ -5,6 +5,7 @@
#include <thread> #include <thread>
#include <semaphore.h> #include <semaphore.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/alignment.h"
#include "common/error.h" #include "common/error.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/singleton.h" #include "common/singleton.h"
@ -16,6 +17,8 @@
#include "core/linker.h" #include "core/linker.h"
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
#else
#include <sys/mman.h>
#endif #endif
namespace Libraries::Kernel { namespace Libraries::Kernel {
@ -46,7 +49,8 @@ void init_pthreads() {
} }
void pthreadInitSelfMainThread() { void pthreadInitSelfMainThread() {
g_pthread_self = new PthreadInternal{}; auto* pthread_pool = g_pthread_cxt->GetPthreadPool();
g_pthread_self = pthread_pool->Create();
scePthreadAttrInit(&g_pthread_self->attr); scePthreadAttrInit(&g_pthread_self->attr);
g_pthread_self->pth = pthread_self(); g_pthread_self->pth = pthread_self();
g_pthread_self->name = "Main_Thread"; g_pthread_self->name = "Main_Thread";
@ -978,7 +982,14 @@ ScePthread PThreadPool::Create() {
} }
} }
#ifndef _WIN64
auto* ret = new PthreadInternal{}; auto* ret = new PthreadInternal{};
#else
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(mmap(hint_address, sizeof(PthreadInternal),
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false; ret->is_free = false;
ret->is_detached = false; ret->is_detached = false;

View File

@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
template <StringLiteral name, class F, F f> template <StringLiteral name, class F, F f>
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap; constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
// #define W(foo) wrapper<#foo, decltype(&foo), foo> //#define W(foo) wrapper<#foo, decltype(&foo), foo>
#define W(foo) foo #define W(foo) foo
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \ #define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \

View File

@ -10,6 +10,7 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#endif #endif
#include <thread>
#include <common/assert.h> #include <common/assert.h>
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
@ -59,6 +60,7 @@ int PS4_SYSV_ABI sce_net_in6addr_nodelocal_allnodes() {
} }
OrbisNetId PS4_SYSV_ABI sceNetAccept(OrbisNetId s, OrbisNetSockaddr* addr, u32* paddrlen) { OrbisNetId PS4_SYSV_ABI sceNetAccept(OrbisNetId s, OrbisNetSockaddr* addr, u32* paddrlen) {
std::this_thread::sleep_for(std::chrono::seconds(60));
LOG_ERROR(Lib_Net, "(STUBBED) called"); LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
@ -559,7 +561,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
} }
int PS4_SYSV_ABI sceNetEpollWait() { int PS4_SYSV_ABI sceNetEpollWait() {
LOG_ERROR(Lib_Net, "(STUBBED) called"); //LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -341,6 +341,7 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
switch (mount_mode) { switch (mount_mode) {
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY: case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR: case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: { case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: {
if (!std::filesystem::exists(mount_dir)) { if (!std::filesystem::exists(mount_dir)) {
return ORBIS_SAVE_DATA_ERROR_NOT_FOUND; return ORBIS_SAVE_DATA_ERROR_NOT_FOUND;
@ -349,11 +350,13 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
mnt->Mount(mount_dir, g_mount_point); mnt->Mount(mount_dir, g_mount_point);
mount_result->mount_status = 0; mount_result->mount_status = 0;
strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16); std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
} break; break;
}
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE: case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY: case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR: case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON: ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF | case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |

View File

@ -137,8 +137,9 @@ void Emulator::Run(const std::filesystem::path& file) {
} }
void Emulator::LoadSystemModules(const std::filesystem::path& file) { void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 6> ModulesToLoad{ constexpr std::array<SysModules, 7> ModulesToLoad{
{{"libSceNgs2.sprx", nullptr}, {{"libSceNgs2.sprx", nullptr},
{"libSceFiber.sprx", nullptr},
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},

View File

@ -176,6 +176,10 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{}; spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
switch (program.info.stage) { switch (program.info.stage) {

View File

@ -18,8 +18,8 @@ void EmitBitCastU64F64(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction"); UNREACHABLE_MSG("SPIR-V Instruction");
} }
void EmitBitCastF16U16(EmitContext&) { Id EmitBitCastF16U16(EmitContext& ctx, Id value) {
UNREACHABLE_MSG("SPIR-V Instruction"); return ctx.OpBitcast(ctx.F16[1], value);
} }
Id EmitBitCastF32U32(EmitContext& ctx, Id value) { Id EmitBitCastF32U32(EmitContext& ctx, Id value) {

View File

@ -60,7 +60,11 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget0:
case IR::Attribute::RenderTarget1: case IR::Attribute::RenderTarget1:
case IR::Attribute::RenderTarget2: case IR::Attribute::RenderTarget2:
case IR::Attribute::RenderTarget3: { case IR::Attribute::RenderTarget3:
case IR::Attribute::RenderTarget4:
case IR::Attribute::RenderTarget5:
case IR::Attribute::RenderTarget6:
case IR::Attribute::RenderTarget7: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0); const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
if (ctx.frag_num_comp[index] > 1) { if (ctx.frag_num_comp[index] > 1) {
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element)); return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
@ -196,7 +200,15 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
} }
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE(); const auto info = inst->Flags<IR::BufferInstInfo>();
const auto& buffer = ctx.buffers[handle];
boost::container::static_vector<Id, 2> ids;
for (u32 i = 0; i < 2; i++) {
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
}
return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
} }
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {

View File

@ -255,4 +255,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], value); return ctx.OpConvertUToF(ctx.F64[1], value);
} }
Id EmitConvertU16U32(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U16, value);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -6,6 +6,11 @@
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
ctx.Decorate(op, spv::Decoration::NoContraction);
return op;
}
Id EmitFPAbs16(EmitContext& ctx, Id value) { Id EmitFPAbs16(EmitContext& ctx, Id value) {
return ctx.OpFAbs(ctx.F16[1], value); return ctx.OpFAbs(ctx.F16[1], value);
} }
@ -19,31 +24,31 @@ Id EmitFPAbs64(EmitContext& ctx, Id value) {
} }
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F16[1], a, b); return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
} }
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F32[1], a, b); return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
} }
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F64[1], a, b); return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
} }
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFSub(ctx.F32[1], a, b); return Decorate(ctx, inst, ctx.OpFSub(ctx.F32[1], a, b));
} }
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F16[1], a, b, c); return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
} }
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F32[1], a, b, c); return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
} }
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F64[1], a, b, c); return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
} }
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
@ -63,15 +68,15 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
} }
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F16[1], a, b); return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
} }
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F32[1], a, b); return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
} }
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F64[1], a, b); return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
} }
Id EmitFPNeg16(EmitContext& ctx, Id value) { Id EmitFPNeg16(EmitContext& ctx, Id value) {

View File

@ -141,7 +141,7 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4); const Id color_type = texture.data_types->Get(4);
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), ctx.OpBitcast(color_type, color)); ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
} }
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -146,7 +146,7 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
void EmitBitCastU16F16(EmitContext& ctx); void EmitBitCastU16F16(EmitContext& ctx);
Id EmitBitCastU32F32(EmitContext& ctx, Id value); Id EmitBitCastU32F32(EmitContext& ctx, Id value);
void EmitBitCastU64F64(EmitContext& ctx); void EmitBitCastU64F64(EmitContext& ctx);
void EmitBitCastF16U16(EmitContext&); Id EmitBitCastF16U16(EmitContext& ctx, Id value);
Id EmitBitCastF32U32(EmitContext& ctx, Id value); Id EmitBitCastF32U32(EmitContext& ctx, Id value);
void EmitBitCastF64U64(EmitContext& ctx); void EmitBitCastF64U64(EmitContext& ctx);
Id EmitPackUint2x32(EmitContext& ctx, Id value); Id EmitPackUint2x32(EmitContext& ctx, Id value);
@ -343,6 +343,7 @@ Id EmitConvertF64U8(EmitContext& ctx, Id value);
Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset); Id offset);

View File

@ -74,19 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
void EmitContext::DefineArithmeticTypes() { void EmitContext::DefineArithmeticTypes() {
void_id = Name(TypeVoid(), "void_id"); void_id = Name(TypeVoid(), "void_id");
U1[1] = Name(TypeBool(), "bool_id"); U1[1] = Name(TypeBool(), "bool_id");
// F16[1] = Name(TypeFloat(16), "f16_id"); F16[1] = Name(TypeFloat(16), "f16_id");
F32[1] = Name(TypeFloat(32), "f32_id"); F32[1] = Name(TypeFloat(32), "f32_id");
// F64[1] = Name(TypeFloat(64), "f64_id"); // F64[1] = Name(TypeFloat(64), "f64_id");
S32[1] = Name(TypeSInt(32), "i32_id"); S32[1] = Name(TypeSInt(32), "i32_id");
U32[1] = Name(TypeUInt(32), "u32_id"); U32[1] = Name(TypeUInt(32), "u32_id");
// U8 = Name(TypeSInt(8), "u8"); // U8 = Name(TypeSInt(8), "u8");
// S8 = Name(TypeUInt(8), "s8"); // S8 = Name(TypeUInt(8), "s8");
// U16 = Name(TypeUInt(16), "u16_id"); U16 = Name(TypeUInt(16), "u16_id");
// S16 = Name(TypeSInt(16), "s16_id"); // S16 = Name(TypeSInt(16), "s16_id");
// U64 = Name(TypeUInt(64), "u64_id"); // U64 = Name(TypeUInt(64), "u64_id");
for (u32 i = 2; i <= 4; i++) { for (u32 i = 2; i <= 4; i++) {
// F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i)); F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i)); F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i)); // F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i)); S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));

View File

@ -32,7 +32,7 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions. * We take the reverse way, extract the original input semantics from these instructions.
**/ **/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) { std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
std::vector<VertexAttribute> attributes; std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max()); GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder; GcnDecodeContext decoder;
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
u32 semantic_index = 0; u32 semantic_index = 0;
while (!code_slice.atEnd()) { while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice); const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) { if (inst.opcode == Opcode::S_SETPC_B64) {
break; break;
} }

View File

@ -17,6 +17,6 @@ struct VertexAttribute {
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
}; };
std::vector<VertexAttribute> ParseFetchShader(const u32* code); std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -5,20 +5,29 @@
namespace Shader::Gcn { namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd; const auto& smrd = inst.control.smrd;
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported"); const u32 dword_offset = [&] -> u32 {
if (smrd.imm) {
return smrd.offset;
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
}
UNREACHABLE();
}();
const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base = const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)); ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code}; IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) { for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i))); ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
} }
} }
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd; const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 { const IR::U32 dword_offset = [&] -> IR::U32 {

View File

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes. // Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code); u32 fetch_size{};
const auto attribs = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
}
for (const auto& attrib : attribs) { for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr}; IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -242,6 +258,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
} }
Translator translator{block, info}; Translator translator{block, info};
for (const auto& inst : inst_list) { for (const auto& inst : inst_list) {
block_base += inst.length;
switch (inst.opcode) { switch (inst.opcode) {
case Opcode::S_MOVK_I32: case Opcode::S_MOVK_I32:
translator.S_MOVK(inst); translator.S_MOVK(inst);
@ -378,6 +395,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::IMAGE_SAMPLE: case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L: case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
translator.IMAGE_SAMPLE(inst); translator.IMAGE_SAMPLE(inst);
break; break;
case Opcode::IMAGE_ATOMIC_ADD: case Opcode::IMAGE_ATOMIC_ADD:
@ -527,6 +545,12 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_CNDMASK_B32: case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst); translator.V_CNDMASK_B32(inst);
break; break;
case Opcode::TBUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ: case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, inst); translator.BUFFER_LOAD_FORMAT(3, true, inst);
break; break;
@ -901,7 +925,6 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
magic_enum::enum_name(inst.opcode), opcode); magic_enum::enum_name(inst.opcode), opcode);
info.translation_failed = true; info.translation_failed = true;
} }
block_base += inst.length;
} }
} }

View File

@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
void Translator::V_CVT_F32_F16(const GcnInst& inst) { void Translator::V_CVT_F32_F16(const GcnInst& inst) {
const IR::U32 src0 = GetSrc(inst.src[0]); const IR::U32 src0 = GetSrc(inst.src[0]);
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0)); const IR::U16 src0l = ir.UConvert(16, src0);
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
} }
void Translator::V_MUL_F32(const GcnInst& inst) { void Translator::V_MUL_F32(const GcnInst& inst) {
@ -519,7 +520,7 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src1{GetSrc(inst.src[1])};
ir.SetVcc(ir.Imm1(false)); ir.SetVcc(ir.FPIsInf(src0));
// TODO // TODO
} }

View File

@ -1194,6 +1194,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
} }
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
switch (result_bitsize) {
case 16:
switch (value.Type()) {
case Type::U32:
return Inst<U16>(Opcode::ConvertU16U32, value);
}
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
} }

View File

@ -283,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF32U16, F32, U16, ) OPCODE(ConvertF32U16, F32, U16, )
OPCODE(ConvertU16U32, U16, U32, )
// Image operations // Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )

View File

@ -234,7 +234,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, Am
**/ **/
IR::Inst* handle = inst.Arg(0).InstRecursive(); IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive(); IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) { if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() || !p0->Arg(1).IsImmediate()) {
return -1; return -1;
} }
IR::Inst* p1 = handle->Arg(1).InstRecursive(); IR::Inst* p1 = handle->Arg(1).InstRecursive();
@ -286,7 +286,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
if (inst_info.is_typed) { if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float && ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 || (inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32)); inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32));
} }
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {

View File

@ -165,6 +165,7 @@ struct Info {
Stage stage; Stage stage;
uintptr_t pgm_base{}; uintptr_t pgm_base{};
u64 pgm_hash{};
u32 shared_memory_size{}; u32 shared_memory_size{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_shared_u8{}; bool uses_shared_u8{};

View File

@ -479,7 +479,7 @@ struct Liverpool {
template <typename T = VAddr> template <typename T = VAddr>
T Address() const { T Address() const {
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32); return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
} }
}; };

View File

@ -329,6 +329,10 @@ struct PM4CmdEventWriteEop {
*Address<u64>() = Common::FencedRDTSC(); *Address<u64>() = Common::FencedRDTSC();
break; break;
} }
case DataSelect::GpuClock64: {
*Address<u64>() = 0;
break;
}
default: { default: {
UNREACHABLE(); UNREACHABLE();
} }
@ -549,8 +553,8 @@ struct PM4DumpConstRam {
u32 addr_hi; u32 addr_hi;
template <typename T> template <typename T>
T* Address() const { T Address() const {
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo); return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
} }
[[nodiscard]] u32 Offset() const { [[nodiscard]] u32 Offset() const {

View File

@ -63,7 +63,7 @@ struct Buffer {
if (stride == 0) { if (stride == 0) {
return 1U; return 1U;
} }
ASSERT(stride % element_size == 0); //ASSERT(stride % element_size == 0);
return stride / element_size; return stride / element_size;
} }

View File

@ -404,6 +404,26 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Unorm) { num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR4G4B4A4UnormPack16; return vk::Format::eR4G4B4A4UnormPack16;
} }
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR16G16B16A16Uint;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint;
}
if (data_format == AmdGpu::DataFormat::FormatBc1 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc1RgbaSrgbBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16Sint;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
} }

View File

@ -209,12 +209,14 @@ bool Instance::CreateDevice() {
.shaderImageGatherExtended = true, .shaderImageGatherExtended = true,
.shaderStorageImageMultisample = true, .shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance, .shaderClipDistance = features.shaderClipDistance,
.shaderInt16 = true,
}, },
}, },
vk::PhysicalDeviceVulkan11Features{ vk::PhysicalDeviceVulkan11Features{
.shaderDrawParameters = true, .shaderDrawParameters = true,
}, },
vk::PhysicalDeviceVulkan12Features{ vk::PhysicalDeviceVulkan12Features{
.shaderFloat16 = true,
.scalarBlockLayout = true, .scalarBlockLayout = true,
.uniformBufferStandardLayout = true, .uniformBufferStandardLayout = true,
.hostQueryReset = true, .hostQueryReset = true,

View File

@ -251,11 +251,16 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
block_pool.ReleaseContents(); block_pool.ReleaseContents();
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();
if (hash == 0x43ade46898f820e2 || hash == 0xbcf2be6c546ad35a) {
return nullptr;
}
// Recompile shader to IR. // Recompile shader to IR.
try { try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash); LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>(); info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V // Compile IR to SPIR-V

View File

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()}, liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool}, pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} { vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
if (!Config::nullGpu()) { if (!Config::nullGpu()) {
liverpool->BindRasterizer(this); liverpool->BindRasterizer(this);
} }
@ -174,11 +174,9 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Upload index data to stream buffer. // Upload index data to stream buffer.
const auto index_address = regs.index_base_address.Address<const void*>(); const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = regs.num_indices * index_size; const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size); const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
static constexpr std::array<u16, 4> test{};
std::memcpy(data, index_address, index_buffer_size); std::memcpy(data, index_address, index_buffer_size);
ASSERT(std::memcmp(data, test.data(), sizeof(test)) != 0);
vertex_index_buffer.Commit(index_buffer_size); vertex_index_buffer.Commit(index_buffer_size);
// Bind index buffer. // Bind index buffer.

View File

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor]; auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound; wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick); //scheduler.Wait(watch.tick);
++wait_cursor; ++wait_cursor;
} }
} }

View File

@ -77,8 +77,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
if (usage_override) { if (usage_override) {
usage_ci.usage = usage_override.value(); usage_ci.usage = usage_override.value();
} }
if (info.format == vk::Format::eR32Sfloat) { if (image.info.type == vk::ImageType::e1D) {
printf("stop\n"); printf("bad\n");
} }
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it. // When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format; vk::Format format = info.format;

View File

@ -185,6 +185,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eB8G8R8A8Unorm: case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint: case vk::Format::eR32Uint:
return vk::Format::eR32Uint; return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc1RgbaUnormBlock: