Work
This commit is contained in:
parent
414c7bd825
commit
58dcd6473d
|
@ -28,7 +28,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
|
|||
// User area size is normally larger than this. However games are unlikely to map to high
|
||||
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
|
||||
// to save space on page tables.
|
||||
static constexpr size_t UserSize = 1ULL << 38;
|
||||
static constexpr size_t UserSize = 1ULL << 39;
|
||||
static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN;
|
||||
|
||||
/**
|
||||
|
|
|
@ -13,6 +13,7 @@ void MntPoints::Mount(const std::filesystem::path& host_folder, const std::strin
|
|||
|
||||
MntPair pair;
|
||||
pair.host_path = host_folder.string();
|
||||
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
|
||||
pair.guest_path = guest_folder;
|
||||
|
||||
m_mnt_pairs.push_back(pair);
|
||||
|
@ -40,17 +41,36 @@ std::string MntPoints::GetHostDirectory(const std::string& guest_directory) {
|
|||
return "";
|
||||
}
|
||||
|
||||
std::string ToLower(std::string str) {
|
||||
std::transform(str.begin(), str.end(), str.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string MntPoints::GetHostFile(const std::string& guest_file) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
for (auto& pair : m_mnt_pairs) {
|
||||
// horrible code but it works :D
|
||||
int find = guest_file.find(pair.guest_path);
|
||||
if (find == 0) {
|
||||
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
|
||||
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
|
||||
return pair.host_path + npath;
|
||||
if (find != 0) {
|
||||
continue;
|
||||
}
|
||||
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
|
||||
const auto host_path = pair.host_path + npath;
|
||||
#ifndef _WIN64
|
||||
const std::filesystem::path path{host_path};
|
||||
if (!std::filesystem::exists(path)) {
|
||||
const auto filename = ToLower(path.filename());
|
||||
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
|
||||
const auto exist_filename = ToLower(file.path().filename());
|
||||
if (filename == exist_filename) {
|
||||
return file.path();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return host_path;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
|
|
@ -344,8 +344,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return submission_lock == 0;
|
||||
LOG_TRACE(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
|
||||
return liverpool->IsGpuIdle();
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmBeginWorkload() {
|
||||
|
@ -803,9 +803,9 @@ int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return false;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() {
|
||||
|
@ -1930,8 +1930,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
|
|||
}
|
||||
}
|
||||
|
||||
LOG_INFO(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
|
||||
if (submission_lock != 0) {
|
||||
liverpool->WaitGpuIdle();
|
||||
LOG_INFO(Lib_GnmDriver, "Done waiting for GPU");
|
||||
|
||||
// Suspend logic goes here
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
|
|||
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
|
||||
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
|
||||
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
|
||||
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
||||
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger();
|
||||
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException();
|
||||
|
|
|
@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
|
|||
file->m_host_name = mnt->GetHostFile(file->m_guest_name);
|
||||
if (read) {
|
||||
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
|
||||
} else if (write && create && truncate) {
|
||||
} else if (write && create) {
|
||||
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
|
||||
} else if (write && create && append) { // CUSA04729 (appends app0/shaderlist.txt)
|
||||
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
|
||||
|
@ -90,7 +90,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
|
|||
}
|
||||
if (!file->f.IsOpen()) {
|
||||
h->DeleteHandle(handle);
|
||||
return SCE_KERNEL_ERROR_EACCES;
|
||||
return SCE_KERNEL_ERROR_ENOENT;
|
||||
}
|
||||
}
|
||||
file->is_opened = true;
|
||||
|
|
|
@ -110,10 +110,13 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
|
|||
}
|
||||
}
|
||||
|
||||
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
|
||||
VAddr in_addr = reinterpret_cast<VAddr>(*addr);
|
||||
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
||||
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
||||
auto* memory = Core::Memory::Instance();
|
||||
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
|
||||
//in_addr = 0x880000000;
|
||||
}
|
||||
return memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "",
|
||||
false, directMemoryStart, alignment);
|
||||
}
|
||||
|
@ -143,10 +146,13 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
|
|||
return ORBIS_KERNEL_ERROR_EFAULT;
|
||||
}
|
||||
|
||||
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
||||
VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
||||
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
||||
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
||||
auto* memory = Core::Memory::Instance();
|
||||
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
|
||||
//in_addr = 0x880000000;
|
||||
}
|
||||
const int ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||
Core::VMAType::Flexible, name);
|
||||
|
||||
|
@ -161,7 +167,6 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
|
||||
LOG_WARNING(Kernel_Vmm, "called");
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <thread>
|
||||
#include <semaphore.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/alignment.h"
|
||||
#include "common/error.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/singleton.h"
|
||||
|
@ -16,6 +17,8 @@
|
|||
#include "core/linker.h"
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
|
@ -46,7 +49,8 @@ void init_pthreads() {
|
|||
}
|
||||
|
||||
void pthreadInitSelfMainThread() {
|
||||
g_pthread_self = new PthreadInternal{};
|
||||
auto* pthread_pool = g_pthread_cxt->GetPthreadPool();
|
||||
g_pthread_self = pthread_pool->Create();
|
||||
scePthreadAttrInit(&g_pthread_self->attr);
|
||||
g_pthread_self->pth = pthread_self();
|
||||
g_pthread_self->name = "Main_Thread";
|
||||
|
@ -978,7 +982,14 @@ ScePthread PThreadPool::Create() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef _WIN64
|
||||
auto* ret = new PthreadInternal{};
|
||||
#else
|
||||
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
|
||||
auto* ret = reinterpret_cast<PthreadInternal*>(mmap(hint_address, sizeof(PthreadInternal),
|
||||
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
|
||||
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
|
||||
#endif
|
||||
|
||||
ret->is_free = false;
|
||||
ret->is_detached = false;
|
||||
|
|
|
@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
|
|||
template <StringLiteral name, class F, F f>
|
||||
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
|
||||
|
||||
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
|
||||
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
|
||||
#define W(foo) foo
|
||||
|
||||
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <arpa/inet.h>
|
||||
#endif
|
||||
|
||||
#include <thread>
|
||||
#include <common/assert.h>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
|
@ -59,6 +60,7 @@ int PS4_SYSV_ABI sce_net_in6addr_nodelocal_allnodes() {
|
|||
}
|
||||
|
||||
OrbisNetId PS4_SYSV_ABI sceNetAccept(OrbisNetId s, OrbisNetSockaddr* addr, u32* paddrlen) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(60));
|
||||
LOG_ERROR(Lib_Net, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -559,7 +561,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNetEpollWait() {
|
||||
LOG_ERROR(Lib_Net, "(STUBBED) called");
|
||||
//LOG_ERROR(Lib_Net, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -341,6 +341,7 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
|
|||
switch (mount_mode) {
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: {
|
||||
if (!std::filesystem::exists(mount_dir)) {
|
||||
return ORBIS_SAVE_DATA_ERROR_NOT_FOUND;
|
||||
|
@ -349,11 +350,13 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
|
|||
mnt->Mount(mount_dir, g_mount_point);
|
||||
|
||||
mount_result->mount_status = 0;
|
||||
strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
|
||||
} break;
|
||||
std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
|
||||
break;
|
||||
}
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
|
||||
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
|
||||
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |
|
||||
|
|
|
@ -137,8 +137,9 @@ void Emulator::Run(const std::filesystem::path& file) {
|
|||
}
|
||||
|
||||
void Emulator::LoadSystemModules(const std::filesystem::path& file) {
|
||||
constexpr std::array<SysModules, 6> ModulesToLoad{
|
||||
constexpr std::array<SysModules, 7> ModulesToLoad{
|
||||
{{"libSceNgs2.sprx", nullptr},
|
||||
{"libSceFiber.sprx", nullptr},
|
||||
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
|
||||
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
|
||||
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
|
||||
|
|
|
@ -176,6 +176,10 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
|
|||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
switch (program.info.stage) {
|
||||
|
|
|
@ -18,8 +18,8 @@ void EmitBitCastU64F64(EmitContext&) {
|
|||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitBitCastF16U16(EmitContext&) {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpBitcast(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -60,7 +60,11 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
case IR::Attribute::RenderTarget0:
|
||||
case IR::Attribute::RenderTarget1:
|
||||
case IR::Attribute::RenderTarget2:
|
||||
case IR::Attribute::RenderTarget3: {
|
||||
case IR::Attribute::RenderTarget3:
|
||||
case IR::Attribute::RenderTarget4:
|
||||
case IR::Attribute::RenderTarget5:
|
||||
case IR::Attribute::RenderTarget6:
|
||||
case IR::Attribute::RenderTarget7: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||
if (ctx.frag_num_comp[index] > 1) {
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
|
||||
|
@ -196,7 +200,15 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
|||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE();
|
||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
boost::container::static_vector<Id, 2> ids;
|
||||
for (u32 i = 0; i < 2; i++) {
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
|
|
|
@ -255,4 +255,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpConvertUToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUConvert(ctx.U16, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -6,6 +6,11 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
|
||||
ctx.Decorate(op, spv::Decoration::NoContraction);
|
||||
return op;
|
||||
}
|
||||
|
||||
Id EmitFPAbs16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFAbs(ctx.F16[1], value);
|
||||
}
|
||||
|
@ -19,31 +24,31 @@ Id EmitFPAbs64(EmitContext& ctx, Id value) {
|
|||
}
|
||||
|
||||
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F16[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFAdd(ctx.F64[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFSub(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFSub(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F16[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F32[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
|
||||
return ctx.OpFma(ctx.F64[1], a, b, c);
|
||||
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
|
||||
}
|
||||
|
||||
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
|
||||
|
@ -63,15 +68,15 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
|
|||
}
|
||||
|
||||
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F16[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F32[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
|
||||
return ctx.OpFMul(ctx.F64[1], a, b);
|
||||
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
|
||||
}
|
||||
|
||||
Id EmitFPNeg16(EmitContext& ctx, Id value) {
|
||||
|
|
|
@ -141,7 +141,7 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
|
|||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const Id color_type = texture.data_types->Get(4);
|
||||
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), ctx.OpBitcast(color_type, color));
|
||||
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -146,7 +146,7 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
|
|||
void EmitBitCastU16F16(EmitContext& ctx);
|
||||
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastU64F64(EmitContext& ctx);
|
||||
void EmitBitCastF16U16(EmitContext&);
|
||||
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
|
||||
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
|
||||
void EmitBitCastF64U64(EmitContext& ctx);
|
||||
Id EmitPackUint2x32(EmitContext& ctx, Id value);
|
||||
|
@ -343,6 +343,7 @@ Id EmitConvertF64U8(EmitContext& ctx, Id value);
|
|||
Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||
Id offset);
|
||||
|
|
|
@ -74,19 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
|
|||
void EmitContext::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
U1[1] = Name(TypeBool(), "bool_id");
|
||||
// F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
F32[1] = Name(TypeFloat(32), "f32_id");
|
||||
// F64[1] = Name(TypeFloat(64), "f64_id");
|
||||
S32[1] = Name(TypeSInt(32), "i32_id");
|
||||
U32[1] = Name(TypeUInt(32), "u32_id");
|
||||
// U8 = Name(TypeSInt(8), "u8");
|
||||
// S8 = Name(TypeUInt(8), "s8");
|
||||
// U16 = Name(TypeUInt(16), "u16_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
// S16 = Name(TypeSInt(16), "s16_id");
|
||||
// U64 = Name(TypeUInt(64), "u64_id");
|
||||
|
||||
for (u32 i = 2; i <= 4; i++) {
|
||||
// F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
|
||||
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
|
||||
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
|
||||
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
|
||||
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
|
||||
std::vector<VertexAttribute> attributes;
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
|
|||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
const auto inst = decoder.decodeInstruction(code_slice);
|
||||
*out_size += inst.length;
|
||||
|
||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,6 @@ struct VertexAttribute {
|
|||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
};
|
||||
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
|
||||
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -5,20 +5,29 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
if (smrd.imm) {
|
||||
return smrd.offset;
|
||||
}
|
||||
if (smrd.offset == SQ_SRC_LITERAL) {
|
||||
return inst.src[1].code;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}();
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::Value base =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 dword_offset = [&] -> IR::U32 {
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
const auto attribs = ParseFetchShader(code);
|
||||
u32 fetch_size{};
|
||||
const auto attribs = ParseFetchShader(code, &fetch_size);
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
}
|
||||
|
||||
for (const auto& attrib : attribs) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
@ -242,6 +258,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
}
|
||||
Translator translator{block, info};
|
||||
for (const auto& inst : inst_list) {
|
||||
block_base += inst.length;
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_MOVK_I32:
|
||||
translator.S_MOVK(inst);
|
||||
|
@ -378,6 +395,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
case Opcode::IMAGE_SAMPLE:
|
||||
case Opcode::IMAGE_SAMPLE_L:
|
||||
case Opcode::IMAGE_SAMPLE_C_O:
|
||||
case Opcode::IMAGE_SAMPLE_B:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_ATOMIC_ADD:
|
||||
|
@ -527,6 +545,12 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XY:
|
||||
translator.BUFFER_LOAD_FORMAT(2, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, true, inst);
|
||||
break;
|
||||
|
@ -901,7 +925,6 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
|
|||
magic_enum::enum_name(inst.opcode), opcode);
|
||||
info.translation_failed = true;
|
||||
}
|
||||
block_base += inst.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
||||
const IR::U32 src0 = GetSrc(inst.src[0]);
|
||||
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
|
||||
const IR::U16 src0l = ir.UConvert(16, src0);
|
||||
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
|
@ -519,7 +520,7 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
|||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
ir.SetVcc(ir.Imm1(false));
|
||||
ir.SetVcc(ir.FPIsInf(src0));
|
||||
// TODO
|
||||
}
|
||||
|
||||
|
|
|
@ -1194,6 +1194,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
|
|||
}
|
||||
|
||||
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
|
||||
switch (result_bitsize) {
|
||||
case 16:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U16>(Opcode::ConvertU16U32, value);
|
||||
}
|
||||
}
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
|
|
|
@ -283,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
|
|||
OPCODE(ConvertF64S32, F64, U32, )
|
||||
OPCODE(ConvertF64U32, F64, U32, )
|
||||
OPCODE(ConvertF32U16, F32, U16, )
|
||||
OPCODE(ConvertU16U32, U16, U32, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
|
|
|
@ -234,7 +234,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, Am
|
|||
**/
|
||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) {
|
||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() || !p0->Arg(1).IsImmediate()) {
|
||||
return -1;
|
||||
}
|
||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
||||
|
@ -286,7 +286,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32));
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
|
|
|
@ -165,6 +165,7 @@ struct Info {
|
|||
Stage stage;
|
||||
|
||||
uintptr_t pgm_base{};
|
||||
u64 pgm_hash{};
|
||||
u32 shared_memory_size{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared_u8{};
|
||||
|
|
|
@ -479,7 +479,7 @@ struct Liverpool {
|
|||
|
||||
template <typename T = VAddr>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
|
||||
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -329,6 +329,10 @@ struct PM4CmdEventWriteEop {
|
|||
*Address<u64>() = Common::FencedRDTSC();
|
||||
break;
|
||||
}
|
||||
case DataSelect::GpuClock64: {
|
||||
*Address<u64>() = 0;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -549,8 +553,8 @@ struct PM4DumpConstRam {
|
|||
u32 addr_hi;
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 Offset() const {
|
||||
|
|
|
@ -63,7 +63,7 @@ struct Buffer {
|
|||
if (stride == 0) {
|
||||
return 1U;
|
||||
}
|
||||
ASSERT(stride % element_size == 0);
|
||||
//ASSERT(stride % element_size == 0);
|
||||
return stride / element_size;
|
||||
}
|
||||
|
||||
|
|
|
@ -404,6 +404,26 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR16G16B16A16Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||
num_format == AmdGpu::NumberFormat::Uint) {
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format8 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR8Sint;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::FormatBc1 &&
|
||||
num_format == AmdGpu::NumberFormat::Srgb) {
|
||||
return vk::Format::eBc1RgbaSrgbBlock;
|
||||
}
|
||||
if (data_format == AmdGpu::DataFormat::Format16_16 &&
|
||||
num_format == AmdGpu::NumberFormat::Sint) {
|
||||
return vk::Format::eR16G16Sint;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||
}
|
||||
|
||||
|
|
|
@ -209,12 +209,14 @@ bool Instance::CreateDevice() {
|
|||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderClipDistance = features.shaderClipDistance,
|
||||
.shaderInt16 = true,
|
||||
},
|
||||
},
|
||||
vk::PhysicalDeviceVulkan11Features{
|
||||
.shaderDrawParameters = true,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.shaderFloat16 = true,
|
||||
.scalarBlockLayout = true,
|
||||
.uniformBufferStandardLayout = true,
|
||||
.hostQueryReset = true,
|
||||
|
|
|
@ -251,11 +251,16 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
if (hash == 0x43ade46898f820e2 || hash == 0xbcf2be6c546ad35a) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Recompile shader to IR.
|
||||
try {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
|
|
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||
pipeline_cache{instance, scheduler, liverpool},
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
|
||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
|
@ -174,11 +174,9 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
|
|||
|
||||
// Upload index data to stream buffer.
|
||||
const auto index_address = regs.index_base_address.Address<const void*>();
|
||||
const u32 index_buffer_size = regs.num_indices * index_size;
|
||||
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
|
||||
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
|
||||
static constexpr std::array<u16, 4> test{};
|
||||
std::memcpy(data, index_address, index_buffer_size);
|
||||
ASSERT(std::memcmp(data, test.data(), sizeof(test)) != 0);
|
||||
vertex_index_buffer.Commit(index_buffer_size);
|
||||
|
||||
// Bind index buffer.
|
||||
|
|
|
@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
|||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
scheduler.Wait(watch.tick);
|
||||
//scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,8 +77,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
|||
if (usage_override) {
|
||||
usage_ci.usage = usage_override.value();
|
||||
}
|
||||
if (info.format == vk::Format::eR32Sfloat) {
|
||||
printf("stop\n");
|
||||
if (image.info.type == vk::ImageType::e1D) {
|
||||
printf("bad\n");
|
||||
}
|
||||
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
||||
vk::Format format = info.format;
|
||||
|
|
|
@ -185,6 +185,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eB8G8R8A8Srgb:
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
|
|
Loading…
Reference in New Issue