Fixup a few things

This commit is contained in:
IndecisiveTurtle 2024-07-04 23:17:04 +03:00
parent 58dcd6473d
commit 7e7fa4c701
40 changed files with 160 additions and 188 deletions

View File

@ -14,6 +14,12 @@
namespace Common {
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
std::vector<std::string> SplitString(const std::string& str, char delimiter) {
std::istringstream iss(str);
std::vector<std::string> output(1);

View File

@ -9,6 +9,9 @@
namespace Common {
/// Make a string lowercase
[[nodiscard]] std::string ToLower(std::string str);
std::vector<std::string> SplitString(const std::string& str, char delimiter);
#ifdef _WIN32

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "common/string_util.h"
#include "core/file_sys/fs.h"
namespace Core::FileSys {
@ -41,12 +42,6 @@ std::string MntPoints::GetHostDirectory(const std::string& guest_directory) {
return "";
}
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
std::string MntPoints::GetHostFile(const std::string& guest_file) {
std::scoped_lock lock{m_mutex};
@ -61,9 +56,9 @@ std::string MntPoints::GetHostFile(const std::string& guest_file) {
#ifndef _WIN64
const std::filesystem::path path{host_path};
if (!std::filesystem::exists(path)) {
const auto filename = ToLower(path.filename());
const auto filename = Common::ToLower(path.filename());
for (const auto& file : std::filesystem::directory_iterator(path.parent_path())) {
const auto exist_filename = ToLower(file.path().filename());
const auto exist_filename = Common::ToLower(file.path().filename());
if (filename == exist_filename) {
return file.path();
}

View File

@ -344,8 +344,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
}
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed() {
LOG_TRACE(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
return liverpool->IsGpuIdle();
LOG_TRACE(Lib_GnmDriver, "called");
return submission_lock == 0;
}
int PS4_SYSV_ABI sceGnmBeginWorkload() {
@ -912,9 +912,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
return ORBIS_OK;
}
u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return 0x800000000;
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
@ -1930,10 +1930,8 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
}
}
LOG_INFO(Lib_GnmDriver, "called submission_lock = {}", submission_lock);
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
LOG_INFO(Lib_GnmDriver, "Done waiting for GPU");
// Suspend logic goes here

View File

@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp();
int PS4_SYSV_ABI sceGnmGetEqEventType();
int PS4_SYSV_ABI sceGnmGetEqTimeStamp();
int PS4_SYSV_ABI sceGnmGetGpuBlockStatus();
u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus();
int PS4_SYSV_ABI sceGnmGetLastWaitedAddress();
int PS4_SYSV_ABI sceGnmGetNumTcaUnits();

View File

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
file->m_host_name = mnt->GetHostFile(file->m_guest_name);
if (read) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
} else if (write && create) {
} else if (write && create && truncate) {
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (write && create && append) { // CUSA04729 (appends app0/shaderlist.txt)
file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
@ -90,7 +90,7 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) {
}
if (!file->f.IsOpen()) {
h->DeleteHandle(handle);
return SCE_KERNEL_ERROR_ENOENT;
return SCE_KERNEL_ERROR_EACCES;
}
}
file->is_opened = true;

View File

@ -110,13 +110,10 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
}
}
VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
return memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "",
false, directMemoryStart, alignment);
}
@ -146,13 +143,10 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
return ORBIS_KERNEL_ERROR_EFAULT;
}
VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance();
if (False(map_flags & Core::MemoryMapFlags::Fixed) && in_addr == 0) {
//in_addr = 0x880000000;
}
const int ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name);

View File

@ -4,8 +4,8 @@
#include <mutex>
#include <thread>
#include <semaphore.h>
#include "common/assert.h"
#include "common/alignment.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
#include "common/singleton.h"
@ -67,7 +67,6 @@ int PS4_SYSV_ABI scePthreadAttrInit(ScePthreadAttr* attr) {
SceKernelSchedParam param{};
param.sched_priority = 700;
result = pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB);
result = (result == 0 ? scePthreadAttrSetinheritsched(attr, 4) : result);
result = (result == 0 ? scePthreadAttrSetschedparam(attr, &param) : result);
result = (result == 0 ? scePthreadAttrSetschedpolicy(attr, SCHED_OTHER) : result);
@ -926,10 +925,6 @@ int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr
attr = g_pthread_cxt->GetDefaultAttr();
}
if (name != nullptr && std::string_view(name) == "RenderMixThread") {
printf("bad\n");
}
*thread = pthread_pool->Create();
if ((*thread)->attr != nullptr) {
@ -986,8 +981,9 @@ ScePthread PThreadPool::Create() {
auto* ret = new PthreadInternal{};
#else
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(mmap(hint_address, sizeof(PthreadInternal),
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif

View File

@ -42,7 +42,7 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> {
template <StringLiteral name, class F, F f>
constexpr auto wrapper = wrapper_impl<name, F, f>::wrap;
//#define W(foo) wrapper<#foo, decltype(&foo), foo>
// #define W(foo) wrapper<#foo, decltype(&foo), foo>
#define W(foo) foo
#define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \

View File

@ -10,7 +10,6 @@
#include <arpa/inet.h>
#endif
#include <thread>
#include <common/assert.h>
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
@ -60,7 +59,6 @@ int PS4_SYSV_ABI sce_net_in6addr_nodelocal_allnodes() {
}
OrbisNetId PS4_SYSV_ABI sceNetAccept(OrbisNetId s, OrbisNetSockaddr* addr, u32* paddrlen) {
std::this_thread::sleep_for(std::chrono::seconds(60));
LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK;
}
@ -561,7 +559,7 @@ int PS4_SYSV_ABI sceNetEpollDestroy() {
}
int PS4_SYSV_ABI sceNetEpollWait() {
//LOG_ERROR(Lib_Net, "(STUBBED) called");
LOG_ERROR(Lib_Net, "(STUBBED) called");
return ORBIS_OK;
}

View File

@ -356,7 +356,8 @@ s32 saveDataMount(u32 user_id, std::string dir_name, u32 mount_mode,
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDONLY:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_RDWR |
ORBIS_SAVE_DATA_MOUNT_MODE_COPY_ICON:
case ORBIS_SAVE_DATA_MOUNT_MODE_CREATE | ORBIS_SAVE_DATA_MOUNT_MODE_DESTRUCT_OFF |

View File

@ -61,12 +61,7 @@ int PS4_SYSV_ABI sceMsgDialogUpdateStatus() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceImeDialogGetStatus() {
return 1;
}
void RegisterlibSceMsgDialog(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("IADmD4tScBY", "libSceImeDialog", 1, "libSceImeDialog", 1, 1, sceImeDialogGetStatus);
LIB_FUNCTION("HTrcDKlFKuM", "libSceMsgDialog", 1, "libSceMsgDialog", 1, 1, sceMsgDialogClose);
LIB_FUNCTION("Lr8ovHH9l6A", "libSceMsgDialog", 1, "libSceMsgDialog", 1, 1,
sceMsgDialogGetResult);

View File

@ -137,9 +137,8 @@ void Emulator::Run(const std::filesystem::path& file) {
}
void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 7> ModulesToLoad{
constexpr std::array<SysModules, 6> ModulesToLoad{
{{"libSceNgs2.sprx", nullptr},
{"libSceFiber.sprx", nullptr},
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},

View File

@ -81,17 +81,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id of
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id result_type = texture.data_types->Get(4);
if (Sirit::ValidId(lod)) {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, spv::ImageOperandsMask::Lod, lod));
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords,
spv::ImageOperandsMask::Lod, lod));
} else {
return ctx.OpBitcast(ctx.F32[4], ctx.OpImageFetch(result_type, image, coords));
}
}
Id EmitImageFetchU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms) {
return Id{};
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);

View File

@ -359,8 +359,6 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms);
Id EmitImageFetchU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,

View File

@ -231,10 +231,10 @@ void EmitContext::DefineOutputs(const Info& info) {
f32_zero_value, f32_zero_value};
const Id type{TypeArray(F32[1], ConstU32(8U))};
const Id initializer{ConstantComposite(type, zero)};
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance,
spv::StorageClass::Output, initializer);
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance,
spv::StorageClass::Output, initializer);
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
initializer);
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
initializer);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {

View File

@ -74,7 +74,8 @@ public:
}
[[nodiscard]] Id DefineVariable(Id type, std::optional<spv::BuiltIn> builtin,
spv::StorageClass storage_class, std::optional<Id> initializer = std::nullopt) {
spv::StorageClass storage_class,
std::optional<Id> initializer = std::nullopt) {
const Id id{DefineVar(type, storage_class, initializer)};
if (builtin) {
Decorate(id, spv::Decoration::BuiltIn, *builtin);

View File

@ -633,7 +633,8 @@ private:
if (!stmt.block->is_dummy) {
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), info);
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
info);
}
break;
}

View File

@ -62,8 +62,7 @@ void Translator::S_BARRIER() {
}
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
SetDst(inst.dst[0], src0);
UNREACHABLE();
}
} // namespace Shader::Gcn

View File

@ -39,7 +39,8 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::Value vsharp = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
const IR::Value vsharp =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {

View File

@ -240,9 +240,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
attrib.instance_data);
}
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.fmt = buffer.GetNumberFmt(),
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,

View File

@ -518,10 +518,7 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
ir.SetVcc(ir.FPIsInf(src0));
// TODO
UNREACHABLE();
}
} // namespace Shader::Gcn

View File

@ -250,7 +250,8 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
}
const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
@ -311,7 +312,8 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break;
}
const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, info);
}

View File

@ -272,8 +272,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
return Inst<U64>(Opcode::LoadSharedU64, offset);
case 128:
return Inst(Opcode::LoadSharedU128, offset);
}
default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
}
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
@ -294,7 +295,7 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
Inst(Opcode::WriteSharedU128, offset, value);
break;
default:
throw InvalidArgument("Invalid bit size {}", bit_size);
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
}
}

View File

@ -293,7 +293,6 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageFetchU32, U32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )

View File

@ -82,7 +82,6 @@ bool IsImageInstruction(const IR::Inst& inst) {
case IR::Opcode::ImageSampleDrefExplicitLod:
case IR::Opcode::ImageSampleDrefImplicitLod:
case IR::Opcode::ImageFetch:
case IR::Opcode::ImageFetchU32:
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
case IR::Opcode::ImageQueryDimensions:
@ -222,19 +221,23 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
static constexpr size_t MaxUboSize = 65536;
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, AmdGpu::Buffer& cbuf) {
/**
* Assert for the following pattern
* s_getpc_b64 s[32:33]
* s_add_u32 s32, <const>, s32
* s_addc_u32 s33, 0, s33
* s_mov_b32 s35, <const>
* s_movk_i32 s34, <const>
* buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
**/
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
AmdGpu::Buffer& cbuf) {
// Assuming V# is in UD s[32:35]
// The next pattern:
// s_getpc_b64 s[32:33]
// s_add_u32 s32, <const>, s32
// s_addc_u32 s33, 0, s33
// s_mov_b32 s35, <const>
// s_movk_i32 s34, <const>
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
// is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* p0 = handle->Arg(0).InstRecursive();
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() || !p0->Arg(1).IsImmediate()) {
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
!p0->Arg(1).IsImmediate()) {
return -1;
}
IR::Inst* p1 = handle->Arg(1).InstRecursive();
@ -244,10 +247,12 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, Am
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
return -1;
}
// We have found this pattern. Build the sharp and assign a binding to it.
cbuf.raw0 = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
cbuf.num_records = handle->Arg(2).U32();
cbuf.raw11 = handle->Arg(3).U32();
// We have found this pattern. Build the sharp.
std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32;
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
.sgpr_base = std::numeric_limits<u32>::max(),
.dword_offset = 0,

View File

@ -329,10 +329,6 @@ struct PM4CmdEventWriteEop {
*Address<u64>() = Common::FencedRDTSC();
break;
}
case DataSelect::GpuClock64: {
*Address<u64>() = 0;
break;
}
default: {
UNREACHABLE();
}

View File

@ -21,27 +21,21 @@ enum class CompSwizzle : u32 {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer {
union {
u64 raw0;
BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle;
BitField<63, 1, u64> swizzle_enable;
};
u64 base_address : 44;
u64 : 4;
u64 stride : 14;
u64 cache_swizzle : 1;
u64 swizzle_enable : 1;
u32 num_records;
union {
u32 raw11;
BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z;
BitField<9, 3, u32> dst_sel_w;
BitField<0, 12, u32> dst_sel;
BitField<12, 3, NumberFormat> num_format;
BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride;
BitField<23, 1, u32> add_tid_enable;
};
u32 dst_sel_x : 3;
u32 dst_sel_y : 3;
u32 dst_sel_z : 3;
u32 dst_sel_w : 3;
u32 num_format : 3;
u32 data_format : 4;
u32 element_size : 2;
u32 index_stride : 2;
u32 add_tid_enable : 1;
operator bool() const noexcept {
return base_address != 0;
@ -52,18 +46,27 @@ struct Buffer {
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
NumberFormat GetNumberFmt() const noexcept {
return static_cast<NumberFormat>(num_format);
}
DataFormat GetDataFmt() const noexcept {
return static_cast<DataFormat>(data_format);
}
u32 GetStride() const noexcept {
return stride == 0 ? 1U : stride.Value();
return stride == 0 ? 1U : stride;
}
u32 GetStrideElements(u32 element_size) const noexcept {
if (stride == 0) {
return 1U;
}
//ASSERT(stride % element_size == 0);
ASSERT(stride % element_size == 0);
return stride / element_size;
}
@ -71,6 +74,7 @@ struct Buffer {
return GetStride() * num_records;
}
};
static_assert(sizeof(Buffer) == 16); // 128bits
enum class ImageType : u64 {
Buffer = 0,

View File

@ -412,12 +412,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32A32Uint;
}
if (data_format == AmdGpu::DataFormat::Format8 &&
num_format == AmdGpu::NumberFormat::Sint) {
if (data_format == AmdGpu::DataFormat::Format8 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR8Sint;
}
if (data_format == AmdGpu::DataFormat::FormatBc1 &&
num_format == AmdGpu::NumberFormat::Srgb) {
if (data_format == AmdGpu::DataFormat::FormatBc1 && num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eBc1RgbaSrgbBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16 &&

View File

@ -14,8 +14,8 @@ namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, info{*info_} {
u64 compute_key_, vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
@ -93,7 +93,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
for (const auto& buffer : info.buffers) {
const auto vsharp = buffer.GetVsharp(info);
const u32 size = vsharp.GetSize();
const VAddr address = vsharp.base_address.Value();
const VAddr address = vsharp.base_address;
texture_cache.OnCpuWrite(address);
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()

View File

@ -24,7 +24,7 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, const Shader::Info* info,
vk::ShaderModule module);
u64 compute_key, vk::ShaderModule module);
~ComputePipeline();
[[nodiscard]] vk::Pipeline Handle() const noexcept {

View File

@ -47,7 +47,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0,
});
bindings.push_back({
@ -327,7 +327,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = buffer.GetVsharp(stage);
const VAddr address = vsharp.base_address.Value();
const VAddr address = vsharp.base_address;
const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
@ -419,8 +419,7 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
continue;
}
guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address.Value(),
buffer.base_address.Value() + buffer.GetSize());
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
}
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
return lhv.base_address < rhv.base_address;

View File

@ -74,12 +74,12 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
available_extensions = GetSupportedExtensions(physical_device);
properties = physical_device.getProperties();
CollectDeviceParameters();
ASSERT_MSG(properties.apiVersion >= TargetVulkanApiVersion,
"Vulkan {}.{} is required, but only {}.{} is supported by device!",
VK_VERSION_MAJOR(TargetVulkanApiVersion), VK_VERSION_MINOR(TargetVulkanApiVersion),
VK_VERSION_MAJOR(properties.apiVersion), VK_VERSION_MINOR(properties.apiVersion));
CollectDeviceParameters();
CreateDevice();
CollectToolingInfo();
}
@ -245,8 +245,7 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
}
};
}};
if (!color_write_en) {
device_chain.unlink<vk::PhysicalDeviceColorWriteEnableFeaturesEXT>();

View File

@ -22,42 +22,46 @@ using Shader::VsOutput;
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None ||
z != VsOutput::None || w != VsOutput::None) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
}
};
// VS_OUT_MISC_VEC
add_output(
ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag ? VsOutput::EdgeFlag :
(ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag ? VsOutput::KillFlag :
(ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None
);
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
// VS_OUT_CCDIST0
add_output(
ctl.IsClipDistEnabled(0) ? VsOutput::ClipDist0 :
(ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1) ? VsOutput::ClipDist1 :
(ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2) ? VsOutput::ClipDist2 :
(ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3) ? VsOutput::ClipDist3 :
(ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)
);
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
// VS_OUT_CCDIST1
add_output(
ctl.IsClipDistEnabled(4) ? VsOutput::ClipDist4 :
(ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5) ? VsOutput::ClipDist5 :
(ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6) ? VsOutput::ClipDist6 :
(ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7) ? VsOutput::ClipDist7 :
(ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)
);
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
@ -214,13 +218,13 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
// actual draw hence can skip pipeline creation.
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
//return {};
return {};
}
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
//return {};
return {};
}
u32 binding{};
@ -251,10 +255,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
if (hash == 0x43ade46898f820e2 || hash == 0xbcf2be6c546ad35a) {
return nullptr;
}
// Recompile shader to IR.
try {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
@ -314,7 +314,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
const auto name = fmt::format("cs_{:#x}", compute_key);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
&program.info, module);
&program.info, compute_key, module);
} catch (const Shader::Exception& e) {
UNREACHABLE_MSG("{}", e.what());
return nullptr;

View File

@ -75,7 +75,8 @@ void Rasterizer::DispatchDirect() {
}
try {
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
const auto has_resources =
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
}
@ -137,10 +138,6 @@ void Rasterizer::BeginRendering() {
};
texture_cache.TouchMeta(htile_address, false);
state.num_depth_attachments++;
} else {
if (regs.depth_render_control.depth_compress_disable) {
LOG_WARNING(Render_Vulkan, "No depth buffer bound with dcc");
}
}
scheduler.BeginRendering(state);
}

View File

@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
//scheduler.Wait(watch.tick);
scheduler.Wait(watch.tick);
++wait_cursor;
}
}

View File

@ -221,9 +221,6 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
if (cpu_addr == 2990538752ULL) {
printf("bad\n");
}
ASSERT(info.pixel_format != vk::Format::eUndefined);
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
vk::ImageCreateFlagBits::eExtendedUsage};

View File

@ -77,9 +77,6 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
if (usage_override) {
usage_ci.usage = usage_override.value();
}
if (image.info.type == vk::ImageType::e1D) {
printf("bad\n");
}
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;
vk::ImageAspectFlags aspect = image.aspect_mask;

View File

@ -127,14 +127,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
image_ids.push_back(image_id);
});
//ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
ImageId image_id{};
if (image_ids.empty()) {
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
RegisterImage(image_id);
} else {
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
image_id = image_ids[0];
}
RegisterMeta(info, image_id);