This commit is contained in:
IndecisiveTurtle 2024-07-04 15:32:01 +03:00
parent 414c7bd825
commit 58dcd6473d
38 changed files with 209 additions and 63 deletions

View File

@ -28,7 +28,7 @@ constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
// User area size is normally larger than this. However games are unlikely to map to high
// regions of that area, so by default we allocate a smaller virtual address space (about 1/4th).
// to save space on page tables.
static constexpr size_t UserSize = 1ULL << 38;
static constexpr size_t UserSize = 1ULL << 39;
static constexpr size_t SystemSize = USER_MIN - SYSTEM_MANAGED_MIN;
/**

View File

@ -13,6 +13,7 @@ void MntPoints::Mount(const std::filesystem::path& host_folder, const std::strin
MntPair pair;
pair.host_path = host_folder.string();
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
pair.guest_path = guest_folder;
m_mnt_pairs.push_back(pair);
@ -40,17 +41,36 @@ std::string MntPoints::GetHostDirectory(const std::string& guest_directory) {
return "";
}
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
std::string MntPoints::GetHostFile(const std::string& guest_file) {
std::scoped_lock lock{m_mutex};
for (auto& pair : m_mnt_pairs) {
// horrible code but it works :D
int find = guest_file.find(pair.guest_path);
if (find == 0) {
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
std::replace(pair.host_path.begin(), pair.host_path.end(), '\\', '/');
return pair.host_path + npath;
if (find != 0) {
continue;
}
std::string npath = guest_file.substr(pair.guest_path.size(), guest_file.size() - 1);
const auto host_path = pair.host_path + npath;
#ifndef _WIN64
const std::filesystem::path path{host_path};
if (!std::filesystem::exists(path)) {
const auto filename = ToLower(path.filename());
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
const auto exist_filename = ToLower(file.path().filename());
if (filename == exist_filename) {
return file.path();
}
}
}
#endif
return host_path;
}
return "";
}

View File

@ -344,8 +344,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
}
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() {
LOG_TRACE(Lib_GnmDriver, "called");
return submission_lock == 0;
LOG_TRACE(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
return liverpool->IsGpuIdle();
}
int PS4_SYSV_ABI sceGnmBeginWorkload() {
@ -803,9 +803,9 @@ int PS4_SYSV_ABI sceGnmDrawOpaqueAuto() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress() {
LOG_TRACE(Lib_GnmDriver, "called");
return false;
}
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface() {
@ -1930,8 +1930,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
}
}
LOG_INFO(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
LOG_INFO(Lib_GnmDriver, "Done waiting for GPU");
// Suspend logic goes here

View File

@ -63,7 +63,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size);
u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size);
int PS4_SYSV_ABI sceGnmDrawOpaqueAuto();
int PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
bool PS4_SYSV_ABI sceGnmDriverCaptureInProgress();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterface();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuDebugger();
int PS4_SYSV_ABI sceGnmDriverInternalRetrieveGnmInterfaceForGpuException();

View File

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
file->m_host_name = mnt->GetHostFile(file->m_guest_name);
if (read) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
} else if (write && create && truncate) {
} else if (write && create) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (write && create && append) { // CUSA04729 (appends app0/shaderlist.txt)
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
@ -90,7 +90,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
}
if (!file->f.IsOpen()) {
h->DeleteHandle(handle);
return SCE_KERNEL_ERROR_EACCES;
return SCE_KERNEL_ERROR_ENOENT;
}
}
file->is_opened = true;

View File

@ -110,10 +110,13 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
}
}
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
return memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "",
false, directMemoryStart, alignment);
}
@ -143,10 +146,13 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
return ORBIS_KERNEL_ERROR_EFAULT;
}
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
const int ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name);
@ -161,7 +167,6 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
}
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
LOG_WARNING(Kernel_Vmm, "called");
auto* memory = Core::Memory::Instance();
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
}

View File

@ -5,6 +5,7 @@
#include <thread>
#include <semaphore.h>
#include "common/assert.h"
#include "common/alignment.h"
#include "common/error.h"
#include "common/logging/log.h"
#include "common/singleton.h"
@ -16,6 +17,8 @@
#include "core/linker.h"
#ifdef _WIN64
#include <windows.h>
#else
#include <sys/mman.h>
#endif
namespace Libraries::Kernel {
@ -46,7 +49,8 @@ void init_pthreads() {
}
void pthreadInitSelfMainThread() {
g_pthread_self = new PthreadInternal{};
auto* pthread_pool = g_pthread_cxt->GetPthreadPool();
g_pthread_self = pthread_pool->Create();
scePthreadAttrInit(&g_pthread_self->attr);
g_pthread_self->pth = pthread_self();
g_pthread_self->name = "Main_Thread";
@ -978,7 +982,14 @@ ScePthread PThreadPool::Create() {
}
}
#ifndef _WIN64
auto* ret = new PthreadInternal{};
#else
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(mmap(hint_address, sizeof(PthreadInternal),
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false;
ret->is_detached = false;

View File

@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
template <StringLiteral name, class F, F f>
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
#define W(foo) foo
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \

View File

@ -10,6 +10,7 @@
#include <arpa/inet.h>
#endif
#include <thread>
#include <common/assert.h>
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
@ -59,6 +60,7 @@ int PS4_SYSV_ABI sce_net_in6addr_nodelocal_allnodes() {
}
OrbisNetId PS4_SYSV_ABI sceNetAccept(OrbisNetId s, OrbisNetSockaddr* addr, u32* paddrlen) {
std::this_thread::sleep_for(std::chrono::seconds(60));
LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK;
}
@ -559,7 +561,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
}
int PS4_SYSV_ABI sceNetEpollWait() {
LOG_ERROR(Lib_Net, "(STUBBED) called");
//LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK;
}

View File

@ -341,6 +341,7 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
switch (mount_mode) {
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF: {
if (!std::filesystem::exists(mount_dir)) {
return ORBIS_SAVE_DATA_ERROR_NOT_FOUND;
@ -349,11 +350,13 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
mnt->Mount(mount_dir, g_mount_point);
mount_result->mount_status = 0;
strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
} break;
std::strncpy(mount_result->mount_point.data, g_mount_point.c_str(), 16);
break;
}
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |

View File

@ -137,8 +137,9 @@ void Emulator::Run(const std::filesystem::path& file) {
}
void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 6> ModulesToLoad{
constexpr std::array<SysModules, 7> ModulesToLoad{
{{"libSceNgs2.sprx", nullptr},
{"libSceFiber.sprx", nullptr},
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},

View File

@ -176,6 +176,10 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::Float16);
ctx.AddCapability(spv::Capability::Int16);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
switch (program.info.stage) {

View File

@ -18,8 +18,8 @@ void EmitBitCastU64F64(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
void EmitBitCastF16U16(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
Id EmitBitCastF16U16(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.F16[1], value);
}
Id EmitBitCastF32U32(EmitContext& ctx, Id value) {

View File

@ -60,7 +60,11 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::RenderTarget0:
case IR::Attribute::RenderTarget1:
case IR::Attribute::RenderTarget2:
case IR::Attribute::RenderTarget3: {
case IR::Attribute::RenderTarget3:
case IR::Attribute::RenderTarget4:
case IR::Attribute::RenderTarget5:
case IR::Attribute::RenderTarget6:
case IR::Attribute::RenderTarget7: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
if (ctx.frag_num_comp[index] > 1) {
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
@ -196,7 +200,15 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE();
const auto info = inst->Flags<IR::BufferInstInfo>();
const auto& buffer = ctx.buffers[handle];
boost::container::static_vector<Id, 2> ids;
for (u32 i = 0; i < 2; i++) {
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
}
return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {

View File

@ -255,4 +255,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value) {
return ctx.OpConvertUToF(ctx.F64[1], value);
}
Id EmitConvertU16U32(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U16, value);
}
} // namespace Shader::Backend::SPIRV

View File

@ -6,6 +6,11 @@
namespace Shader::Backend::SPIRV {
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
ctx.Decorate(op, spv::Decoration::NoContraction);
return op;
}
Id EmitFPAbs16(EmitContext& ctx, Id value) {
return ctx.OpFAbs(ctx.F16[1], value);
}
@ -19,31 +24,31 @@ Id EmitFPAbs64(EmitContext& ctx, Id value) {
}
Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F16[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
}
Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
}
Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFAdd(ctx.F64[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
}
Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFSub(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFSub(ctx.F32[1], a, b));
}
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F16[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
}
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F32[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
}
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return ctx.OpFma(ctx.F64[1], a, b, c);
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
}
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
@ -63,15 +68,15 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
}
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F16[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
}
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
}
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return ctx.OpFMul(ctx.F64[1], a, b);
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
}
Id EmitFPNeg16(EmitContext& ctx, Id value) {

View File

@ -141,7 +141,7 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4);
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), ctx.OpBitcast(color_type, color));
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color));
}
} // namespace Shader::Backend::SPIRV

View File

@ -146,7 +146,7 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
void EmitBitCastU16F16(EmitContext& ctx);
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
void EmitBitCastU64F64(EmitContext& ctx);
void EmitBitCastF16U16(EmitContext&);
Id EmitBitCastF16U16(EmitContext& ctx, Id value);
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
void EmitBitCastF64U64(EmitContext& ctx);
Id EmitPackUint2x32(EmitContext& ctx, Id value);
@ -343,6 +343,7 @@ Id EmitConvertF64U8(EmitContext& ctx, Id value);
Id EmitConvertF64U16(EmitContext& ctx, Id value);
Id EmitConvertF64U32(EmitContext& ctx, Id value);
Id EmitConvertF64U64(EmitContext& ctx, Id value);
Id EmitConvertU16U32(EmitContext& ctx, Id value);
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
Id offset);

View File

@ -74,19 +74,19 @@ Id EmitContext::Def(const IR::Value& value) {
void EmitContext::DefineArithmeticTypes() {
void_id = Name(TypeVoid(), "void_id");
U1[1] = Name(TypeBool(), "bool_id");
// F16[1] = Name(TypeFloat(16), "f16_id");
F16[1] = Name(TypeFloat(16), "f16_id");
F32[1] = Name(TypeFloat(32), "f32_id");
// F64[1] = Name(TypeFloat(64), "f64_id");
S32[1] = Name(TypeSInt(32), "i32_id");
U32[1] = Name(TypeUInt(32), "u32_id");
// U8 = Name(TypeSInt(8), "u8");
// S8 = Name(TypeUInt(8), "s8");
// U16 = Name(TypeUInt(16), "u16_id");
U16 = Name(TypeUInt(16), "u16_id");
// S16 = Name(TypeSInt(16), "s16_id");
// U64 = Name(TypeUInt(64), "u64_id");
for (u32 i = 2; i <= 4; i++) {
// F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
F16[i] = Name(TypeVector(F16[1], i), fmt::format("f16vec{}_id", i));
F32[i] = Name(TypeVector(F32[1], i), fmt::format("f32vec{}_id", i));
// F64[i] = Name(TypeVector(F64[1], i), fmt::format("f64vec{}_id", i));
S32[i] = Name(TypeVector(S32[1], i), fmt::format("i32vec{}_id", i));

View File

@ -32,7 +32,7 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
@ -47,6 +47,8 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) {
break;
}

View File

@ -17,6 +17,6 @@ struct VertexAttribute {
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
};
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
std::vector<VertexAttribute> ParseFetchShader(const u32* code, u32* out_size);
} // namespace Shader::Gcn

View File

@ -5,20 +5,29 @@
namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
const u32 dword_offset = [&] -> u32 {
if (smrd.imm) {
return smrd.offset;
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
}
UNREACHABLE();
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
}
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {

View File

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
u32 fetch_size{};
const auto attribs = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
}
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -242,6 +258,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
}
Translator translator{block, info};
for (const auto& inst : inst_list) {
block_base += inst.length;
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
translator.S_MOVK(inst);
@ -378,6 +395,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:
@ -527,6 +545,12 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, inst);
break;
@ -901,7 +925,6 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
magic_enum::enum_name(inst.opcode), opcode);
info.translation_failed = true;
}
block_base += inst.length;
}
}

View File

@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
const IR::U32 src0 = GetSrc(inst.src[0]);
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
const IR::U16 src0l = ir.UConvert(16, src0);
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
}
void Translator::V_MUL_F32(const GcnInst& inst) {
@ -519,7 +520,7 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
ir.SetVcc(ir.Imm1(false));
ir.SetVcc(ir.FPIsInf(src0));
// TODO
}

View File

@ -1194,6 +1194,13 @@ F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_s
}
U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) {
switch (result_bitsize) {
case 16:
switch (value.Type()) {
case Type::U32:
return Inst<U16>(Opcode::ConvertU16U32, value);
}
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}

View File

@ -283,6 +283,7 @@ OPCODE(ConvertF32U32, F32, U32,
OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, )
OPCODE(ConvertF32U16, F32, U16, )
OPCODE(ConvertU16U32, U16, U32, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )

View File

@ -234,7 +234,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, Am
**/
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate()) {
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() || !p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
@ -286,7 +286,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32));
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32));
}
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {

View File

@ -165,6 +165,7 @@ struct Info {
Stage stage;
uintptr_t pgm_base{};
u64 pgm_hash{};
u32 shared_memory_size{};
bool uses_group_quad{};
bool uses_shared_u8{};

View File

@ -479,7 +479,7 @@ struct Liverpool {
template <typename T = VAddr>
T Address() const {
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};

View File

@ -329,6 +329,10 @@ struct PM4CmdEventWriteEop {
*Address<u64>() = Common::FencedRDTSC();
break;
}
case DataSelect::GpuClock64: {
*Address<u64>() = 0;
break;
}
default: {
UNREACHABLE();
}
@ -549,8 +553,8 @@ struct PM4DumpConstRam {
u32 addr_hi;
template <typename T>
T* Address() const {
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
T Address() const {
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
}
[[nodiscard]] u32 Offset() const {

View File

@ -63,7 +63,7 @@ struct Buffer {
if (stride == 0) {
return 1U;
}
ASSERT(stride % element_size == 0);
//ASSERT(stride % element_size == 0);
return stride / element_size;
}

View File

@ -404,6 +404,26 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR4G4B4A4UnormPack16;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR16G16B16A16Uint;
}
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint;
}
if (data_format == AmdGpu::DataFormat::FormatBc1 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc1RgbaSrgbBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16Sint;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View File

@ -209,12 +209,14 @@ bool Instance::CreateDevice() {
.shaderImageGatherExtended = true,
.shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance,
.shaderInt16 = true,
},
},
vk::PhysicalDeviceVulkan11Features{
.shaderDrawParameters = true,
},
vk::PhysicalDeviceVulkan12Features{
.shaderFloat16 = true,
.scalarBlockLayout = true,
.uniformBufferStandardLayout = true,
.hostQueryReset = true,

View File

@ -251,11 +251,16 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
if (hash == 0x43ade46898f820e2 || hash == 0xbcf2be6c546ad35a) {
return nullptr;
}
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
// Compile IR to SPIR-V

View File

@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 3_GB, BufferType::Upload} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
@ -174,11 +174,9 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed, u32 index_offset) {
// Upload index data to stream buffer.
const auto index_address = regs.index_base_address.Address<const void*>();
const u32 index_buffer_size = regs.num_indices * index_size;
const u32 index_buffer_size = (index_offset + regs.num_indices) * index_size;
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
static constexpr std::array<u16, 4> test{};
std::memcpy(data, index_address, index_buffer_size);
ASSERT(std::memcmp(data, test.data(), sizeof(test)) != 0);
vertex_index_buffer.Commit(index_buffer_size);
// Bind index buffer.

View File

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
//scheduler.Wait(watch.tick);
++wait_cursor;
}
}

View File

@ -77,8 +77,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
if (usage_override) {
usage_ci.usage = usage_override.value();
}
if (info.format == vk::Format::eR32Sfloat) {
printf("stop\n");
if (image.info.type == vk::ImageType::e1D) {
printf("bad\n");
}
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;

View File

@ -185,6 +185,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock: