Merge pull request #148 from shadps4-emu/images
video_core: Add linear image support
This commit is contained in:
commit
5f67b59180
|
@ -401,6 +401,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/texture_cache/image.h
|
src/video_core/texture_cache/image.h
|
||||||
src/video_core/texture_cache/image_view.cpp
|
src/video_core/texture_cache/image_view.cpp
|
||||||
src/video_core/texture_cache/image_view.h
|
src/video_core/texture_cache/image_view.h
|
||||||
|
src/video_core/texture_cache/sampler.cpp
|
||||||
|
src/video_core/texture_cache/sampler.h
|
||||||
src/video_core/texture_cache/slot_vector.h
|
src/video_core/texture_cache/slot_vector.h
|
||||||
src/video_core/texture_cache/texture_cache.cpp
|
src/video_core/texture_cache/texture_cache.cpp
|
||||||
src/video_core/texture_cache/texture_cache.h
|
src/video_core/texture_cache/texture_cache.h
|
||||||
|
|
|
@ -9,16 +9,17 @@
|
||||||
|
|
||||||
namespace Config {
|
namespace Config {
|
||||||
|
|
||||||
bool isNeo = false;
|
static bool isNeo = false;
|
||||||
u32 screenWidth = 1280;
|
static u32 screenWidth = 1280;
|
||||||
u32 screenHeight = 720;
|
static u32 screenHeight = 720;
|
||||||
s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select
|
static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select
|
||||||
std::string logFilter;
|
static std::string logFilter;
|
||||||
std::string logType = "sync";
|
static std::string logType = "sync";
|
||||||
bool isDebugDump = false;
|
static bool isDebugDump = false;
|
||||||
bool isLibc = true;
|
static bool isLibc = true;
|
||||||
bool isShowSplash = false;
|
static bool isShowSplash = false;
|
||||||
bool isNullGpu = false;
|
static bool isNullGpu = false;
|
||||||
|
static bool shouldDumpShaders = false;
|
||||||
|
|
||||||
bool isLleLibc() {
|
bool isLleLibc() {
|
||||||
return isLibc;
|
return isLibc;
|
||||||
|
@ -59,6 +60,10 @@ bool nullGpu() {
|
||||||
return isNullGpu;
|
return isNullGpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool dumpShaders() {
|
||||||
|
return shouldDumpShaders;
|
||||||
|
}
|
||||||
|
|
||||||
void load(const std::filesystem::path& path) {
|
void load(const std::filesystem::path& path) {
|
||||||
// If the configuration file does not exist, create it and return
|
// If the configuration file does not exist, create it and return
|
||||||
std::error_code error;
|
std::error_code error;
|
||||||
|
@ -96,6 +101,7 @@ void load(const std::filesystem::path& path) {
|
||||||
screenHeight = toml::find_or<toml::integer>(gpu, "screenHeight", screenHeight);
|
screenHeight = toml::find_or<toml::integer>(gpu, "screenHeight", screenHeight);
|
||||||
gpuId = toml::find_or<toml::integer>(gpu, "gpuId", 0);
|
gpuId = toml::find_or<toml::integer>(gpu, "gpuId", 0);
|
||||||
isNullGpu = toml::find_or<toml::boolean>(gpu, "nullGpu", false);
|
isNullGpu = toml::find_or<toml::boolean>(gpu, "nullGpu", false);
|
||||||
|
shouldDumpShaders = toml::find_or<toml::boolean>(gpu, "dumpShaders", false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (data.contains("Debug")) {
|
if (data.contains("Debug")) {
|
||||||
|
@ -142,6 +148,7 @@ void save(const std::filesystem::path& path) {
|
||||||
data["GPU"]["screenWidth"] = screenWidth;
|
data["GPU"]["screenWidth"] = screenWidth;
|
||||||
data["GPU"]["screenHeight"] = screenHeight;
|
data["GPU"]["screenHeight"] = screenHeight;
|
||||||
data["GPU"]["nullGpu"] = isNullGpu;
|
data["GPU"]["nullGpu"] = isNullGpu;
|
||||||
|
data["GPU"]["dumpShaders"] = shouldDumpShaders;
|
||||||
data["Debug"]["DebugDump"] = isDebugDump;
|
data["Debug"]["DebugDump"] = isDebugDump;
|
||||||
data["LLE"]["libc"] = isLibc;
|
data["LLE"]["libc"] = isLibc;
|
||||||
|
|
||||||
|
|
|
@ -22,5 +22,6 @@ bool debugDump();
|
||||||
bool isLleLibc();
|
bool isLleLibc();
|
||||||
bool showSplash();
|
bool showSplash();
|
||||||
bool nullGpu();
|
bool nullGpu();
|
||||||
|
bool dumpShaders();
|
||||||
|
|
||||||
}; // namespace Config
|
}; // namespace Config
|
||||||
|
|
|
@ -881,11 +881,13 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) {
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
|
u32 shader_modifier) {
|
||||||
LOG_TRACE(Lib_GnmDriver, "called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
// A fullscreen triangle with one uv set
|
// A fullscreen triangle with one uv set
|
||||||
const static u32 shader_code[] = {
|
// clang-format off
|
||||||
|
constexpr static std::array shader_code alignas(256) = {
|
||||||
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
||||||
0x36020081u, // v_and_b32 v1, 1, v0
|
0x36020081u, // v_and_b32 v1, 1, v0
|
||||||
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
||||||
|
@ -893,6 +895,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
||||||
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
||||||
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
||||||
|
0x7E000B00U,
|
||||||
0x7e040280u, // v_cvt_f32_i32 v0, v0
|
0x7e040280u, // v_cvt_f32_i32 v0, v0
|
||||||
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
||||||
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
||||||
|
@ -901,9 +904,11 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
|
||||||
|
|
||||||
// OrbShdr header
|
// OrbShdr header
|
||||||
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
||||||
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u};
|
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10
|
const auto shader_addr = uintptr_t(shader_code.data()); // Original address is 0xfe000f10
|
||||||
const static u32 vs_regs[] = {
|
const static u32 vs_regs[] = {
|
||||||
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/debug.h"
|
|
||||||
#include "core/libraries/kernel/event_queue.h"
|
#include "core/libraries/kernel/event_queue.h"
|
||||||
|
|
||||||
namespace Libraries::Kernel {
|
namespace Libraries::Kernel {
|
||||||
|
|
|
@ -35,7 +35,8 @@ static void* PS4_SYSV_ABI sceKernelGetProcParam() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t PS4_SYSV_ABI sceKernelReleaseDirectMemory(off_t start, size_t len) {
|
int32_t PS4_SYSV_ABI sceKernelReleaseDirectMemory(off_t start, size_t len) {
|
||||||
UNREACHABLE();
|
auto* memory = Core::Memory::Instance();
|
||||||
|
memory->Free(start, len);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -199,6 +199,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
||||||
|
return;
|
||||||
const vk::Device device = instance->GetDevice();
|
const vk::Device device = instance->GetDevice();
|
||||||
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
|
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
|
||||||
void* host_pointer = reinterpret_cast<void*>(addr);
|
void* host_pointer = reinterpret_cast<void*>(addr);
|
||||||
|
@ -270,6 +271,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
|
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
|
||||||
|
return;
|
||||||
const auto it = mapped_memories.find(addr);
|
const auto it = mapped_memories.find(addr);
|
||||||
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
|
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
|
||||||
mapped_memories.erase(it);
|
mapped_memories.erase(it);
|
||||||
|
|
|
@ -218,8 +218,8 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& bindings) {
|
std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) {
|
||||||
EmitContext ctx{profile, program, bindings};
|
EmitContext ctx{profile, program, binding};
|
||||||
const Id main{DefineMain(ctx, program)};
|
const Id main{DefineMain(ctx, program)};
|
||||||
DefineEntryPoint(program, ctx, main);
|
DefineEntryPoint(program, ctx, main);
|
||||||
if (program.info.stage == Stage::Vertex) {
|
if (program.info.stage == Stage::Vertex) {
|
||||||
|
|
|
@ -4,18 +4,12 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program,
|
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program,
|
||||||
Bindings& bindings);
|
u32& binding);
|
||||||
|
|
||||||
[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
|
|
||||||
Bindings binding;
|
|
||||||
return EmitSPIRV(profile, program, binding);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -61,14 +61,11 @@ Id EmitReadConst(EmitContext& ctx) {
|
||||||
throw LogicError("Unreachable instruction");
|
throw LogicError("Unreachable instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr,
|
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||||
const IR::Value& offset) {
|
const Id buffer = ctx.buffers[handle];
|
||||||
throw LogicError("Unreachable instruction");
|
const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
||||||
}
|
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
||||||
|
return ctx.OpLoad(ctx.F32[1], ptr);
|
||||||
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& addr,
|
|
||||||
const IR::Value& offset) {
|
|
||||||
throw LogicError("Unreachable instruction");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
|
@ -99,32 +96,28 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
const IR::Value& address) {
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
const IR::Value& address) {
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
const IR::Value& address) {
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
const IR::Value& address) {
|
|
||||||
const auto info = inst->Flags<IR::BufferInstInfo>();
|
const auto info = inst->Flags<IR::BufferInstInfo>();
|
||||||
const Id buffer = ctx.buffers[handle.U32()];
|
const Id buffer = ctx.buffers[handle];
|
||||||
const Id type = ctx.info.buffers[handle.U32()].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
const Id type = ctx.info.buffers[handle].is_storage ? ctx.storage_f32 : ctx.uniform_f32;
|
||||||
if (info.index_enable && info.offset_enable) {
|
if (info.index_enable && info.offset_enable) {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
} else if (info.index_enable) {
|
} else if (info.index_enable) {
|
||||||
boost::container::static_vector<Id, 4> ids;
|
boost::container::static_vector<Id, 4> ids;
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
const Id index{ctx.OpIAdd(ctx.U32[1], ctx.Def(address), ctx.ConstU32(i))};
|
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
|
||||||
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
const Id ptr{ctx.OpAccessChain(type, buffer, ctx.ConstU32(0U), index)};
|
||||||
ids.push_back(ctx.OpLoad(ctx.F32[1], ptr));
|
ids.push_back(ctx.OpLoad(ctx.F32[1], ptr));
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,14 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||||
Id bias_lc, const IR::Value& offset) {
|
Id offset) {
|
||||||
throw NotImplementedException("SPIR-V Instruction");
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
|
const auto info = inst->Flags<IR::TextureInstInfo>();
|
||||||
|
return ctx.OpImageSampleImplicitLod(ctx.F32[4], sampled_image, coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
|
|
|
@ -43,18 +43,11 @@ void EmitSetGotoVariable(EmitContext& ctx);
|
||||||
void EmitGetGotoVariable(EmitContext& ctx);
|
void EmitGetGotoVariable(EmitContext& ctx);
|
||||||
void EmitSetScc(EmitContext& ctx);
|
void EmitSetScc(EmitContext& ctx);
|
||||||
Id EmitReadConst(EmitContext& ctx);
|
Id EmitReadConst(EmitContext& ctx);
|
||||||
Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
|
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
|
||||||
const IR::Value& offset);
|
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||||
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
|
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||||
const IR::Value& offset);
|
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||||
const IR::Value& address);
|
|
||||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
|
||||||
const IR::Value& address);
|
|
||||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
|
||||||
const IR::Value& address);
|
|
||||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, const IR::Value& handle,
|
|
||||||
const IR::Value& address);
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||||
|
@ -319,8 +312,8 @@ Id EmitConvertF64U16(EmitContext& ctx, Id value);
|
||||||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||||
|
|
||||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc,
|
||||||
Id bias_lc, const IR::Value& offset);
|
Id offset);
|
||||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||||
Id lod, const IR::Value& offset);
|
Id lod, const IR::Value& offset);
|
||||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||||
|
|
|
@ -35,17 +35,14 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
|
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding_)
|
||||||
: Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
|
: Sirit::Module(profile_.supported_spirv), info{program.info}, profile{profile_},
|
||||||
stage{program.info.stage} {
|
stage{program.info.stage}, binding{binding_} {
|
||||||
u32& uniform_binding{bindings.unified};
|
|
||||||
u32& storage_binding{bindings.unified};
|
|
||||||
u32& texture_binding{bindings.unified};
|
|
||||||
u32& image_binding{bindings.unified};
|
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineInterfaces(program);
|
DefineInterfaces(program);
|
||||||
DefineBuffers(program.info);
|
DefineBuffers(program.info);
|
||||||
|
DefineImagesAndSamplers(program.info);
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
|
@ -235,16 +232,15 @@ void EmitContext::DefineOutputs(const Info& info) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineBuffers(const Info& info) {
|
void EmitContext::DefineBuffers(const Info& info) {
|
||||||
const auto define_buffer = [&](const BufferResource& buffer, Id type, u32 element_size,
|
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||||
char type_char, u32 index) {
|
ASSERT(True(buffer.used_types & IR::Type::F32));
|
||||||
ASSERT(buffer.stride % element_size == 0);
|
ASSERT(buffer.stride % sizeof(float) == 0);
|
||||||
const u32 num_elements = buffer.stride * buffer.num_records / element_size;
|
const u32 num_elements = buffer.stride * buffer.num_records / sizeof(float);
|
||||||
const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))};
|
const Id record_array_type{TypeArray(F32[1], ConstU32(num_elements))};
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, element_size);
|
|
||||||
|
|
||||||
const Id struct_type{TypeStruct(record_array_type)};
|
const Id struct_type{TypeStruct(record_array_type)};
|
||||||
const auto name =
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||||
fmt::format("{}_cbuf_block_{}{}", stage, type_char, element_size * CHAR_BIT);
|
|
||||||
|
const auto name = fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
|
||||||
Name(struct_type, name);
|
Name(struct_type, name);
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, 0, "data");
|
MemberName(struct_type, 0, "data");
|
||||||
|
@ -254,25 +250,112 @@ void EmitContext::DefineBuffers(const Info& info) {
|
||||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||||
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
|
||||||
if (buffer.is_storage) {
|
if (buffer.is_storage) {
|
||||||
storage_f32 = TypePointer(storage_class, type);
|
storage_f32 = TypePointer(storage_class, F32[1]);
|
||||||
} else {
|
} else {
|
||||||
uniform_f32 = TypePointer(storage_class, type);
|
uniform_f32 = TypePointer(storage_class, F32[1]);
|
||||||
}
|
}
|
||||||
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
|
||||||
Decorate(id, spv::Decoration::Binding, binding);
|
Decorate(id, spv::Decoration::Binding, binding);
|
||||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
Name(id, fmt::format("c{}", index));
|
Name(id, fmt::format("c{}", i));
|
||||||
|
|
||||||
binding++;
|
binding++;
|
||||||
buffers.push_back(id);
|
buffers.push_back(id);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
};
|
|
||||||
|
|
||||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
|
||||||
ASSERT(True(buffer.used_types & IR::Type::F32));
|
|
||||||
define_buffer(buffer, F32[1], 4, 'f', i);
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id ImageType(EmitContext& ctx, const ImageResource& desc) {
|
||||||
|
const spv::ImageFormat format{spv::ImageFormat::Unknown};
|
||||||
|
const Id type{ctx.F32[1]};
|
||||||
|
const bool depth{desc.is_depth};
|
||||||
|
switch (desc.type) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
case AmdGpu::ImageType::Color2DMsaa:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false,
|
||||||
|
desc.type == AmdGpu::ImageType::Color2DMsaa, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
case AmdGpu::ImageType::Color2DMsaaArray:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true,
|
||||||
|
desc.type == AmdGpu::ImageType::Color2DMsaaArray, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format,
|
||||||
|
spv::AccessQualifier::ReadOnly);
|
||||||
|
case AmdGpu::ImageType::Buffer:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||||
|
const auto format = spv::ImageFormat::Unknown; // Read this from tsharp?
|
||||||
|
switch (desc.type) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 1, format);
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 1, format);
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 1, format);
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 1, format);
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, 2, format);
|
||||||
|
case AmdGpu::ImageType::Buffer:
|
||||||
|
throw NotImplementedException("Image buffer");
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
||||||
|
for (const auto& image_desc : info.images) {
|
||||||
|
const Id sampled_type{image_desc.nfmt == AmdGpu::NumberFormat::Uint ? U32[1] : F32[1]};
|
||||||
|
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||||
|
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||||
|
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||||
|
Decorate(id, spv::Decoration::Binding, binding);
|
||||||
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
|
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
|
||||||
|
image_desc.dword_offset));
|
||||||
|
images.push_back({
|
||||||
|
.id = id,
|
||||||
|
.sampled_type = TypeSampledImage(image_type),
|
||||||
|
.pointer_type = pointer_type,
|
||||||
|
.image_type = image_type,
|
||||||
|
});
|
||||||
|
interfaces.push_back(id);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.samplers.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sampler_type = TypeSampler();
|
||||||
|
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
|
||||||
|
for (const auto& samp_desc : info.samplers) {
|
||||||
|
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
|
||||||
|
Decorate(id, spv::Decoration::Binding, binding);
|
||||||
|
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||||
|
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
|
||||||
|
samp_desc.dword_offset));
|
||||||
|
samplers.push_back(id);
|
||||||
|
interfaces.push_back(id);
|
||||||
|
++binding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <sirit/sirit.h>
|
#include <sirit/sirit.h>
|
||||||
|
|
||||||
#include "shader_recompiler/backend/bindings.h"
|
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
@ -29,7 +28,7 @@ struct VectorIds {
|
||||||
|
|
||||||
class EmitContext final : public Sirit::Module {
|
class EmitContext final : public Sirit::Module {
|
||||||
public:
|
public:
|
||||||
explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding);
|
explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
|
||||||
~EmitContext();
|
~EmitContext();
|
||||||
|
|
||||||
Id Def(const IR::Value& value);
|
Id Def(const IR::Value& value);
|
||||||
|
@ -152,8 +151,20 @@ public:
|
||||||
Id base_vertex{};
|
Id base_vertex{};
|
||||||
std::array<Id, 8> frag_color{};
|
std::array<Id, 8> frag_color{};
|
||||||
|
|
||||||
u32 binding{};
|
struct TextureDefinition {
|
||||||
|
Id id;
|
||||||
|
Id sampled_type;
|
||||||
|
Id pointer_type;
|
||||||
|
Id image_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
u32& binding;
|
||||||
boost::container::small_vector<Id, 4> buffers;
|
boost::container::small_vector<Id, 4> buffers;
|
||||||
|
boost::container::small_vector<TextureDefinition, 4> images;
|
||||||
|
boost::container::small_vector<Id, 4> samplers;
|
||||||
|
|
||||||
|
Id sampler_type{};
|
||||||
|
Id sampler_pointer_type{};
|
||||||
|
|
||||||
struct SpirvAttribute {
|
struct SpirvAttribute {
|
||||||
Id id;
|
Id id;
|
||||||
|
@ -170,6 +181,7 @@ private:
|
||||||
void DefineInputs(const Info& info);
|
void DefineInputs(const Info& info);
|
||||||
void DefineOutputs(const Info& info);
|
void DefineOutputs(const Info& info);
|
||||||
void DefineBuffers(const Info& info);
|
void DefineBuffers(const Info& info);
|
||||||
|
void DefineImagesAndSamplers(const Info& info);
|
||||||
|
|
||||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||||
};
|
};
|
||||||
|
|
|
@ -8,15 +8,18 @@ namespace Shader::Gcn {
|
||||||
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
|
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
|
||||||
const IR::U32U64& address) {
|
const IR::U32U64& address) {
|
||||||
for (u32 i = 0; i < num_dwords; i++) {
|
for (u32 i = 0; i < num_dwords; i++) {
|
||||||
const IR::U32 value = handle.IsEmpty() ? ir.ReadConst(address, ir.Imm32(i))
|
if (handle.IsEmpty()) {
|
||||||
: ir.ReadConstBuffer(handle, address, ir.Imm32(i));
|
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
|
||||||
ir.SetScalarReg(dst_reg++, value);
|
} else {
|
||||||
|
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
|
||||||
|
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
const auto& smrd = inst.control.smrd;
|
const auto& smrd = inst.control.smrd;
|
||||||
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
|
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||||
const IR::U32 offset =
|
const IR::U32 offset =
|
||||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||||
|
@ -30,14 +33,12 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
|
|
||||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||||
const auto& smrd = inst.control.smrd;
|
const auto& smrd = inst.control.smrd;
|
||||||
const IR::ScalarReg sbase = IR::ScalarReg(inst.src[0].code * 2);
|
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||||
const IR::U32 offset =
|
const IR::U32 offset =
|
||||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||||
ir.Imm32(2))};
|
ir.Imm32(2))};
|
||||||
const IR::Value vsharp =
|
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
|
|
||||||
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
|
|
||||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||||
Load(ir, num_dwords, vsharp, dst_reg, offset);
|
Load(ir, num_dwords, vsharp, dst_reg, offset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,8 +73,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
return ir.Imm32(1.f);
|
return ir.Imm32(1.f);
|
||||||
case OperandField::ConstFloatPos_0_5:
|
case OperandField::ConstFloatPos_0_5:
|
||||||
return ir.Imm32(0.5f);
|
return ir.Imm32(0.5f);
|
||||||
|
case OperandField::ConstFloatPos_2_0:
|
||||||
|
return ir.Imm32(2.0f);
|
||||||
|
case OperandField::ConstFloatPos_4_0:
|
||||||
|
return ir.Imm32(4.0f);
|
||||||
case OperandField::ConstFloatNeg_0_5:
|
case OperandField::ConstFloatNeg_0_5:
|
||||||
return ir.Imm32(-0.5f);
|
return ir.Imm32(-0.5f);
|
||||||
|
case OperandField::ConstFloatNeg_1_0:
|
||||||
|
return ir.Imm32(-1.0f);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -135,6 +141,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_MUL_I32:
|
case Opcode::S_MUL_I32:
|
||||||
translator.S_MUL_I32(inst);
|
translator.S_MUL_I32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_MAD_F32:
|
||||||
|
translator.V_MAD_F32(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_MOV_B32:
|
case Opcode::V_MOV_B32:
|
||||||
translator.V_MOV(inst);
|
translator.V_MOV(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -144,12 +153,39 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_MUL_F32:
|
case Opcode::V_MUL_F32:
|
||||||
translator.V_MUL_F32(inst);
|
translator.V_MUL_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_AND_B32:
|
||||||
|
translator.V_AND_B32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_LSHLREV_B32:
|
||||||
|
translator.V_LSHLREV_B32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_ADD_I32:
|
||||||
|
translator.V_ADD_I32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_I32:
|
||||||
|
translator.V_CVT_F32_I32(inst);
|
||||||
|
break;
|
||||||
|
case Opcode::V_CVT_F32_U32:
|
||||||
|
translator.V_CVT_F32_U32(inst);
|
||||||
|
break;
|
||||||
case Opcode::S_SWAPPC_B64:
|
case Opcode::S_SWAPPC_B64:
|
||||||
ASSERT(info.stage == Stage::Vertex);
|
ASSERT(info.stage == Stage::Vertex);
|
||||||
translator.EmitFetch(inst);
|
translator.EmitFetch(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::S_WAITCNT:
|
case Opcode::S_WAITCNT:
|
||||||
break; // Ignore for now.
|
break;
|
||||||
|
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||||
|
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_BUFFER_LOAD_DWORDX2:
|
||||||
|
translator.S_BUFFER_LOAD_DWORD(2, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_BUFFER_LOAD_DWORDX4:
|
||||||
|
translator.S_BUFFER_LOAD_DWORD(4, inst);
|
||||||
|
break;
|
||||||
|
case Opcode::S_BUFFER_LOAD_DWORDX8:
|
||||||
|
translator.S_BUFFER_LOAD_DWORD(8, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_BUFFER_LOAD_DWORDX16:
|
case Opcode::S_BUFFER_LOAD_DWORDX16:
|
||||||
translator.S_BUFFER_LOAD_DWORD(16, inst);
|
translator.S_BUFFER_LOAD_DWORD(16, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -180,7 +216,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::S_ENDPGM:
|
case Opcode::S_ENDPGM:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unknown opcode {}", u32(inst.opcode));
|
const u32 opcode = u32(inst.opcode);
|
||||||
|
UNREACHABLE_MSG("Unknown opcode {}", opcode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,6 +47,12 @@ public:
|
||||||
void V_MUL_F32(const GcnInst& inst);
|
void V_MUL_F32(const GcnInst& inst);
|
||||||
void V_CMP_EQ_U32(const GcnInst& inst);
|
void V_CMP_EQ_U32(const GcnInst& inst);
|
||||||
void V_CNDMASK_B32(const GcnInst& inst);
|
void V_CNDMASK_B32(const GcnInst& inst);
|
||||||
|
void V_AND_B32(const GcnInst& inst);
|
||||||
|
void V_LSHLREV_B32(const GcnInst& inst);
|
||||||
|
void V_ADD_I32(const GcnInst& inst);
|
||||||
|
void V_CVT_F32_I32(const GcnInst& inst);
|
||||||
|
void V_CVT_F32_U32(const GcnInst& inst);
|
||||||
|
void V_MAD_F32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
#pragma clang optimize off
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
@ -61,4 +61,45 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_AND_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||||
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.BitwiseAnd(src0, src1));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||||
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||||
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.IAdd(src0, src1));
|
||||||
|
// TODO: Carry
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.ConvertSToF(32, 32, src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
ir.SetVectorReg(dst_reg, ir.ConvertUToF(32, 32, src0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||||
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -63,27 +63,34 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||||
// in coords field of the instruction. Then the resource tracking pass will patch the
|
// in coords field of the instruction. Then the resource tracking pass will patch the
|
||||||
// IR instruction to fill in lod_clamp field. The vector can also be used
|
// IR instruction to fill in lod_clamp field.
|
||||||
// as coords directly as SPIR-V will ignore any extra parameters.
|
const IR::Value body = ir.CompositeConstruct(
|
||||||
const IR::Value body =
|
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++),
|
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||||
ir.GetVectorReg(addr_reg++), ir.GetVectorReg(addr_reg++));
|
|
||||||
|
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||||
|
|
||||||
|
IR::TextureInstInfo info{};
|
||||||
|
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
||||||
|
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||||
|
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||||
|
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||||
|
info.explicit_lod.Assign(explicit_lod);
|
||||||
|
|
||||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||||
const IR::Value texel = [&]() -> IR::Value {
|
const IR::Value texel = [&]() -> IR::Value {
|
||||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
||||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
|
||||||
if (!flags.test(MimgModifier::Pcf)) {
|
if (!flags.test(MimgModifier::Pcf)) {
|
||||||
if (explicit_lod) {
|
if (explicit_lod) {
|
||||||
return ir.ImageSampleExplicitLod(handle, body, lod, offset, {});
|
return ir.ImageSampleExplicitLod(handle, body, lod, offset, info);
|
||||||
} else {
|
} else {
|
||||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, {});
|
return ir.ImageSampleImplicitLod(handle, body, bias, offset, {}, info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (explicit_lod) {
|
if (explicit_lod) {
|
||||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, {});
|
return ir.ImageSampleDrefExplicitLod(handle, body, dref, lod, offset, info);
|
||||||
}
|
}
|
||||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, {});
|
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, {}, info);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
|
|
@ -110,6 +110,8 @@ std::string NameOf(Attribute attribute) {
|
||||||
return "InstanceId";
|
return "InstanceId";
|
||||||
case Attribute::FragCoord:
|
case Attribute::FragCoord:
|
||||||
return "FragCoord";
|
return "FragCoord";
|
||||||
|
case Attribute::IsFrontFace:
|
||||||
|
return "IsFrontFace";
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,14 +227,8 @@ U32 IREmitter::ReadConst(const U64& address, const U32& offset) {
|
||||||
return Inst<U32>(Opcode::ReadConst, address, offset);
|
return Inst<U32>(Opcode::ReadConst, address, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||||
U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) {
|
return Inst<F32>(Opcode::ReadConstBuffer, handle, index);
|
||||||
return Inst<U32>(Opcode::ReadConstBuffer, handle, index, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index, const U32& offset) {
|
|
||||||
return Inst<F32>(Opcode::ReadConstBufferF32, handle, index, offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||||
|
|
|
@ -68,8 +68,7 @@ public:
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||||
|
|
||||||
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
|
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
|
||||||
template <typename T = U32>
|
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||||
[[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset);
|
|
||||||
|
|
||||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||||
BufferInstInfo info);
|
BufferInstInfo info);
|
||||||
|
|
|
@ -15,8 +15,7 @@ OPCODE(Epilogue, Void,
|
||||||
|
|
||||||
// Constant memory operations
|
// Constant memory operations
|
||||||
OPCODE(ReadConst, U32, U64, U32, )
|
OPCODE(ReadConst, U32, U64, U32, )
|
||||||
OPCODE(ReadConstBuffer, U32, Opaque, U32, U32 )
|
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||||
OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 )
|
|
||||||
|
|
||||||
// Context getters/setters
|
// Context getters/setters
|
||||||
OPCODE(GetUserData, U32, ScalarReg, )
|
OPCODE(GetUserData, U32, ScalarReg, )
|
||||||
|
|
|
@ -88,15 +88,15 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
// if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
// if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||||
// Replace the bitcast with a typed constant buffer read
|
// // Replace the bitcast with a typed constant buffer read
|
||||||
inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
// inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||||
inst.SetArg(0, arg_inst->Arg(0));
|
// inst.SetArg(0, arg_inst->Arg(0));
|
||||||
inst.SetArg(1, arg_inst->Arg(1));
|
// inst.SetArg(1, arg_inst->Arg(1));
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||||
|
|
|
@ -28,7 +28,6 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
||||||
case IR::Opcode::LoadBufferF32x3:
|
case IR::Opcode::LoadBufferF32x3:
|
||||||
case IR::Opcode::LoadBufferF32x4:
|
case IR::Opcode::LoadBufferF32x4:
|
||||||
case IR::Opcode::ReadConstBuffer:
|
case IR::Opcode::ReadConstBuffer:
|
||||||
case IR::Opcode::ReadConstBufferF32:
|
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -41,6 +40,7 @@ IR::Type BufferLoadType(const IR::Inst& inst) {
|
||||||
case IR::Opcode::LoadBufferF32x2:
|
case IR::Opcode::LoadBufferF32x2:
|
||||||
case IR::Opcode::LoadBufferF32x3:
|
case IR::Opcode::LoadBufferF32x3:
|
||||||
case IR::Opcode::LoadBufferF32x4:
|
case IR::Opcode::LoadBufferF32x4:
|
||||||
|
case IR::Opcode::ReadConstBuffer:
|
||||||
return IR::Type::F32;
|
return IR::Type::F32;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -69,8 +69,10 @@ bool IsImageInstruction(const IR::Inst& inst) {
|
||||||
|
|
||||||
class Descriptors {
|
class Descriptors {
|
||||||
public:
|
public:
|
||||||
explicit Descriptors(BufferResourceList& buffer_resources_)
|
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
|
||||||
: buffer_resources{buffer_resources_} {}
|
SamplerResourceList& sampler_resources_)
|
||||||
|
: buffer_resources{buffer_resources_}, image_resources{image_resources_},
|
||||||
|
sampler_resources{sampler_resources_} {}
|
||||||
|
|
||||||
u32 Add(const BufferResource& desc) {
|
u32 Add(const BufferResource& desc) {
|
||||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||||
|
@ -84,6 +86,23 @@ public:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 Add(const ImageResource& desc) {
|
||||||
|
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||||
|
return desc.sgpr_base == existing.sgpr_base &&
|
||||||
|
desc.dword_offset == existing.dword_offset && desc.type == existing.type &&
|
||||||
|
desc.is_storage == existing.is_storage;
|
||||||
|
})};
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Add(const SamplerResource& desc) {
|
||||||
|
const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) {
|
||||||
|
return desc.sgpr_base == existing.sgpr_base &&
|
||||||
|
desc.dword_offset == existing.dword_offset;
|
||||||
|
})};
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Descriptors, typename Descriptor, typename Func>
|
template <typename Descriptors, typename Descriptor, typename Func>
|
||||||
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
||||||
|
@ -96,6 +115,8 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferResourceList& buffer_resources;
|
BufferResourceList& buffer_resources;
|
||||||
|
ImageResourceList& image_resources;
|
||||||
|
SamplerResourceList& sampler_resources;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
@ -118,8 +139,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||||
|
|
||||||
// Retrieve SGPR that holds sbase
|
// Retrieve SGPR that holds sbase
|
||||||
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
||||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister,
|
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
|
||||||
"Nested resource loads not supported");
|
|
||||||
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
||||||
|
|
||||||
// Return retrieved location.
|
// Return retrieved location.
|
||||||
|
@ -140,7 +160,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
.stride = u32(buffer.stride),
|
.stride = u32(buffer.stride),
|
||||||
.num_records = u32(buffer.num_records),
|
.num_records = u32(buffer.num_records),
|
||||||
.used_types = BufferLoadType(inst),
|
.used_types = BufferLoadType(inst),
|
||||||
.is_storage = buffer.base_address % 64 != 0,
|
.is_storage = /*buffer.base_address % 64 != 0*/ true,
|
||||||
});
|
});
|
||||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
@ -151,6 +171,9 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||||
}
|
}
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Calculate buffer address.
|
// Calculate buffer address.
|
||||||
const u32 dword_stride = buffer.stride / sizeof(u32);
|
const u32 dword_stride = buffer.stride / sizeof(u32);
|
||||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||||
|
@ -160,19 +183,79 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
} else if (inst_info.index_enable) {
|
} else if (inst_info.index_enable) {
|
||||||
const IR::U32 index{inst.Arg(1)};
|
const IR::U32 index{inst.Arg(1)};
|
||||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||||
|
} else if (inst_info.offset_enable) {
|
||||||
|
const IR::U32 offset{inst.Arg(1)};
|
||||||
}
|
}
|
||||||
inst.SetArg(1, address);
|
inst.SetArg(1, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
|
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||||
|
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||||
|
|
||||||
|
// Read image sharp.
|
||||||
|
const auto tsharp = TrackSharp(producer->Arg(0).InstRecursive());
|
||||||
|
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||||
|
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||||
|
const u32 image_binding = descriptors.Add(ImageResource{
|
||||||
|
.sgpr_base = tsharp.sgpr_base,
|
||||||
|
.dword_offset = tsharp.dword_offset,
|
||||||
|
.type = image.type,
|
||||||
|
.nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
|
||||||
|
.is_storage = false,
|
||||||
|
.is_depth = bool(inst_info.is_depth),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Read sampler sharp.
|
||||||
|
const auto ssharp = TrackSharp(producer->Arg(1).InstRecursive());
|
||||||
|
const u32 sampler_binding = descriptors.Add(SamplerResource{
|
||||||
|
.sgpr_base = ssharp.sgpr_base,
|
||||||
|
.dword_offset = ssharp.dword_offset,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Patch image handle
|
||||||
|
const u32 handle = image_binding | (sampler_binding << 16);
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
inst.SetArg(0, ir.Imm32(handle));
|
||||||
|
|
||||||
|
// Now that we know the image type, adjust texture coordinate vector.
|
||||||
|
const IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||||
|
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||||
|
switch (image.type) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
return {body->Arg(0), body->Arg(1)};
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
inst.SetArg(1, coords);
|
||||||
|
|
||||||
|
if (inst_info.has_lod_clamp) {
|
||||||
|
// Final argument contains lod_clamp
|
||||||
|
const u32 arg_pos = inst_info.is_depth ? 5 : 4;
|
||||||
|
inst.SetArg(arg_pos, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ResourceTrackingPass(IR::Program& program) {
|
void ResourceTrackingPass(IR::Program& program) {
|
||||||
auto& info = program.info;
|
auto& info = program.info;
|
||||||
Descriptors descriptors{info.buffers};
|
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||||
for (IR::Block* const block : program.post_order_blocks) {
|
for (IR::Block* const block : program.post_order_blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
if (IsBufferInstruction(inst)) {
|
if (IsBufferInstruction(inst)) {
|
||||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (IsImageInstruction(inst)) {
|
||||||
|
PatchImageInstruction(*block, inst, info, descriptors);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,13 +33,11 @@ union Mode {
|
||||||
|
|
||||||
union TextureInstInfo {
|
union TextureInstInfo {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 16, u32> descriptor_index;
|
BitField<0, 1, u32> is_depth;
|
||||||
BitField<19, 1, u32> is_depth;
|
BitField<1, 1, u32> has_bias;
|
||||||
BitField<20, 1, u32> has_bias;
|
BitField<2, 1, u32> has_lod_clamp;
|
||||||
BitField<21, 1, u32> has_lod_clamp;
|
BitField<3, 1, u32> force_level0;
|
||||||
BitField<22, 1, u32> relaxed_precision;
|
BitField<4, 1, u32> explicit_lod;
|
||||||
BitField<23, 2, u32> gather_component;
|
|
||||||
BitField<25, 2, u32> num_derivatives;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||||
#include "shader_recompiler/frontend/decode.h"
|
#include "shader_recompiler/frontend/decode.h"
|
||||||
#include "shader_recompiler/frontend/structured_control_flow.h"
|
#include "shader_recompiler/frontend/structured_control_flow.h"
|
||||||
|
@ -38,11 +37,6 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
|
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
|
||||||
Gcn::GcnDecodeContext decoder;
|
Gcn::GcnDecodeContext decoder;
|
||||||
|
|
||||||
static int counter = 0;
|
|
||||||
std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary);
|
|
||||||
file.write((const char*)token.data(), token.size_bytes());
|
|
||||||
file.close();
|
|
||||||
|
|
||||||
// Decode and save instructions
|
// Decode and save instructions
|
||||||
IR::Program program;
|
IR::Program program;
|
||||||
program.ins_list.reserve(token.size());
|
program.ins_list.reserve(token.size());
|
||||||
|
@ -71,7 +65,6 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
for (const auto& block : program.blocks) {
|
for (const auto& block : program.blocks) {
|
||||||
fmt::print("{}\n", IR::DumpBlock(*block));
|
fmt::print("{}\n", IR::DumpBlock(*block));
|
||||||
}
|
}
|
||||||
std::fflush(stdout);
|
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,25 +9,6 @@
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct BinaryInfo {
|
|
||||||
u8 signature[7];
|
|
||||||
u8 version;
|
|
||||||
u32 pssl_or_cg : 1;
|
|
||||||
u32 cached : 1;
|
|
||||||
u32 type : 4;
|
|
||||||
u32 source_type : 2;
|
|
||||||
u32 length : 24;
|
|
||||||
u8 chunk_usage_base_offset_in_dw;
|
|
||||||
u8 num_input_usage_slots;
|
|
||||||
u8 is_srt : 1;
|
|
||||||
u8 is_srt_used_info_valid : 1;
|
|
||||||
u8 is_extended_usage_info : 1;
|
|
||||||
u8 reserved2 : 5;
|
|
||||||
u8 reserved3;
|
|
||||||
u64 shader_hash;
|
|
||||||
u32 crc32;
|
|
||||||
};
|
|
||||||
|
|
||||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||||
ObjectPool<IR::Block>& block_pool,
|
ObjectPool<IR::Block>& block_pool,
|
||||||
std::span<const u32> code, const Info&& info);
|
std::span<const u32> code, const Info&& info);
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/ir/type.h"
|
#include "shader_recompiler/ir/type.h"
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
@ -53,6 +53,22 @@ struct BufferResource {
|
||||||
};
|
};
|
||||||
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
|
using BufferResourceList = boost::container::static_vector<BufferResource, 8>;
|
||||||
|
|
||||||
|
struct ImageResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
AmdGpu::ImageType type;
|
||||||
|
AmdGpu::NumberFormat nfmt;
|
||||||
|
bool is_storage;
|
||||||
|
bool is_depth;
|
||||||
|
};
|
||||||
|
using ImageResourceList = boost::container::static_vector<ImageResource, 8>;
|
||||||
|
|
||||||
|
struct SamplerResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
};
|
||||||
|
using SamplerResourceList = boost::container::static_vector<SamplerResource, 8>;
|
||||||
|
|
||||||
struct Info {
|
struct Info {
|
||||||
struct VsInput {
|
struct VsInput {
|
||||||
AmdGpu::NumberFormat fmt;
|
AmdGpu::NumberFormat fmt;
|
||||||
|
@ -101,6 +117,9 @@ struct Info {
|
||||||
AttributeFlags stores{};
|
AttributeFlags stores{};
|
||||||
|
|
||||||
BufferResourceList buffers;
|
BufferResourceList buffers;
|
||||||
|
ImageResourceList images;
|
||||||
|
SamplerResourceList samplers;
|
||||||
|
|
||||||
std::span<const u32> user_data;
|
std::span<const u32> user_data;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,16 @@ struct Liverpool {
|
||||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||||
return reinterpret_cast<const T*>(addr);
|
return reinterpret_cast<const T*>(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::span<const u32> Code() const {
|
||||||
|
u32 code_size = 0;
|
||||||
|
const u32* code = Address<u32>();
|
||||||
|
static constexpr std::string_view PostHeader = "OrbShdr";
|
||||||
|
while (std::memcmp(code + code_size, PostHeader.data(), PostHeader.size()) != 0) {
|
||||||
|
code_size++;
|
||||||
|
}
|
||||||
|
return std::span{code, code_size};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
union PsInputControl {
|
union PsInputControl {
|
||||||
|
@ -228,7 +238,7 @@ struct Liverpool {
|
||||||
enum class ZFormat : u32 {
|
enum class ZFormat : u32 {
|
||||||
Invald = 0,
|
Invald = 0,
|
||||||
Z16 = 1,
|
Z16 = 1,
|
||||||
Z32Float = 2,
|
Z32Float = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StencilFormat : u32 {
|
enum class StencilFormat : u32 {
|
||||||
|
@ -353,8 +363,9 @@ struct Liverpool {
|
||||||
BitField<0, 8, u32> base_addr_hi;
|
BitField<0, 8, u32> base_addr_hi;
|
||||||
u32 base_addr_lo;
|
u32 base_addr_lo;
|
||||||
|
|
||||||
VAddr Address() const {
|
template <typename T = VAddr>
|
||||||
return base_addr_lo | u64(base_addr_hi) << 32;
|
T Address() const {
|
||||||
|
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -448,6 +459,53 @@ struct Liverpool {
|
||||||
u32 data_w;
|
u32 data_w;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct BlendConstants {
|
||||||
|
float red;
|
||||||
|
float green;
|
||||||
|
float blue;
|
||||||
|
float alpha;
|
||||||
|
};
|
||||||
|
|
||||||
|
union BlendControl {
|
||||||
|
enum class BlendFactor : u32 {
|
||||||
|
Zero = 0,
|
||||||
|
One = 1,
|
||||||
|
SrcColor = 2,
|
||||||
|
OneMinusSrcColor = 3,
|
||||||
|
SrcAlpha = 4,
|
||||||
|
OneMinusSrcAlpha = 5,
|
||||||
|
DstAlpha = 6,
|
||||||
|
OneMinusDstAlpha = 7,
|
||||||
|
DstColor = 8,
|
||||||
|
OneMinusDstColor = 9,
|
||||||
|
SrcAlphaSaturate = 10,
|
||||||
|
ConstantColor = 13,
|
||||||
|
OneMinusConstantColor = 14,
|
||||||
|
Src1Color = 15,
|
||||||
|
InvSrc1Color = 16,
|
||||||
|
Src1Alpha = 17,
|
||||||
|
InvSrc1Alpha = 18,
|
||||||
|
ConstantAlpha = 19,
|
||||||
|
OneMinusConstantAlpha = 20,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BlendFunc : u32 {
|
||||||
|
Add = 0,
|
||||||
|
Subtract = 1,
|
||||||
|
Min = 2,
|
||||||
|
Max = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
BitField<0, 5, BlendFactor> color_src_factor;
|
||||||
|
BitField<5, 3, BlendFunc> color_func;
|
||||||
|
BitField<8, 5, BlendFactor> color_dst_factor;
|
||||||
|
BitField<16, 5, BlendFactor> alpha_src_factor;
|
||||||
|
BitField<21, 3, BlendFunc> alpha_func;
|
||||||
|
BitField<24, 5, BlendFactor> alpha_dst_factor;
|
||||||
|
BitField<29, 1, u32> separate_alpha_blend;
|
||||||
|
BitField<30, 1, u32> enable;
|
||||||
|
};
|
||||||
|
|
||||||
struct ColorBuffer {
|
struct ColorBuffer {
|
||||||
enum class EndianSwap : u32 {
|
enum class EndianSwap : u32 {
|
||||||
None = 0,
|
None = 0,
|
||||||
|
@ -577,7 +635,9 @@ struct Liverpool {
|
||||||
INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2);
|
INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2);
|
||||||
std::array<ViewportScissor, NumViewports> viewport_scissors;
|
std::array<ViewportScissor, NumViewports> viewport_scissors;
|
||||||
std::array<ViewportDepth, NumViewports> viewport_depths;
|
std::array<ViewportDepth, NumViewports> viewport_depths;
|
||||||
INSERT_PADDING_WORDS(0xA10B - 0xA0D4);
|
INSERT_PADDING_WORDS(0xA105 - 0xA0D4);
|
||||||
|
BlendConstants blend_constants;
|
||||||
|
INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4);
|
||||||
StencilControl stencil_control;
|
StencilControl stencil_control;
|
||||||
StencilRefMask stencil_ref_front;
|
StencilRefMask stencil_ref_front;
|
||||||
StencilRefMask stencil_ref_back;
|
StencilRefMask stencil_ref_back;
|
||||||
|
@ -593,7 +653,9 @@ struct Liverpool {
|
||||||
ShaderPosFormat shader_pos_format;
|
ShaderPosFormat shader_pos_format;
|
||||||
ShaderExportFormat z_export_format;
|
ShaderExportFormat z_export_format;
|
||||||
ColorExportFormat color_export_format;
|
ColorExportFormat color_export_format;
|
||||||
INSERT_PADDING_WORDS(0xA1F9 - 0xA1C3 - 3);
|
INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3);
|
||||||
|
std::array<BlendControl, NumColorBuffers> blend_control;
|
||||||
|
INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8);
|
||||||
IndexBufferBase index_base_address;
|
IndexBufferBase index_base_address;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
u32 draw_initiator;
|
u32 draw_initiator;
|
||||||
|
@ -732,6 +794,7 @@ static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
|
||||||
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
|
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
|
||||||
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
|
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
|
||||||
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
|
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
|
||||||
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
||||||
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
||||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||||
|
|
|
@ -31,4 +31,196 @@ struct Buffer {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class ImageType : u64 {
|
||||||
|
Buffer = 0,
|
||||||
|
Color1D = 8,
|
||||||
|
Color2D = 9,
|
||||||
|
Color3D = 10,
|
||||||
|
Cube = 11,
|
||||||
|
Color1DArray = 12,
|
||||||
|
Color2DArray = 13,
|
||||||
|
Color2DMsaa = 14,
|
||||||
|
Color2DMsaaArray = 15,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::string_view NameOf(ImageType type) {
|
||||||
|
switch (type) {
|
||||||
|
case ImageType::Buffer:
|
||||||
|
return "Buffer";
|
||||||
|
case ImageType::Color1D:
|
||||||
|
return "Color1D";
|
||||||
|
case ImageType::Color2D:
|
||||||
|
return "Color2D";
|
||||||
|
case ImageType::Color3D:
|
||||||
|
return "Color3D";
|
||||||
|
case ImageType::Cube:
|
||||||
|
return "Cube";
|
||||||
|
case ImageType::Color1DArray:
|
||||||
|
return "Color1DArray";
|
||||||
|
case ImageType::Color2DArray:
|
||||||
|
return "Color2DArray";
|
||||||
|
case ImageType::Color2DMsaa:
|
||||||
|
return "Color2DMsaa";
|
||||||
|
case ImageType::Color2DMsaaArray:
|
||||||
|
return "Color2DMsaaArray";
|
||||||
|
default:
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Image {
|
||||||
|
union {
|
||||||
|
BitField<0, 40, u64> base_address;
|
||||||
|
BitField<40, 12, u64> min_lod;
|
||||||
|
BitField<52, 6, u64> data_format;
|
||||||
|
BitField<58, 4, u64> num_format;
|
||||||
|
BitField<62, 2, u64> mtype;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
BitField<0, 14, u64> width;
|
||||||
|
BitField<14, 14, u64> height;
|
||||||
|
BitField<28, 3, u64> perf_modulation;
|
||||||
|
BitField<31, 1, u64> interlaced;
|
||||||
|
BitField<32, 3, u64> dst_sel_x;
|
||||||
|
BitField<35, 3, u64> dst_sel_y;
|
||||||
|
BitField<38, 3, u64> dst_sel_z;
|
||||||
|
BitField<41, 3, u64> dst_sel_w;
|
||||||
|
BitField<44, 4, u64> base_level;
|
||||||
|
BitField<48, 4, u64> last_level;
|
||||||
|
BitField<52, 5, u64> tiling_index;
|
||||||
|
BitField<57, 1, u64> pow2pad;
|
||||||
|
BitField<58, 1, u64> mtype2;
|
||||||
|
BitField<59, 1, u64> atc;
|
||||||
|
BitField<60, 4, ImageType> type;
|
||||||
|
};
|
||||||
|
|
||||||
|
VAddr Address() const {
|
||||||
|
return base_address << 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataFormat GetDataFmt() const noexcept {
|
||||||
|
return static_cast<DataFormat>(data_format.Value());
|
||||||
|
}
|
||||||
|
|
||||||
|
NumberFormat GetNumberFmt() const noexcept {
|
||||||
|
return static_cast<NumberFormat>(num_format.Value());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 8.2.7. Image Sampler [RDNA 2 Instruction Set Architecture]
|
||||||
|
enum class ClampMode : u64 {
|
||||||
|
Wrap = 0,
|
||||||
|
Mirror = 1,
|
||||||
|
ClampLastTexel = 2,
|
||||||
|
MirrorOnceLastTexel = 3,
|
||||||
|
ClampHalfBorder = 4,
|
||||||
|
MirrorOnceHalfBorder = 5,
|
||||||
|
ClampBorder = 6,
|
||||||
|
MirrorOnceBorder = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AnisoRatio : u64 {
|
||||||
|
One,
|
||||||
|
Two,
|
||||||
|
Four,
|
||||||
|
Eight,
|
||||||
|
Sixteen,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DepthCompare : u64 {
|
||||||
|
Never = 0,
|
||||||
|
Less = 1,
|
||||||
|
Equal = 2,
|
||||||
|
LessEqual = 3,
|
||||||
|
Greater = 4,
|
||||||
|
NotEqual = 5,
|
||||||
|
GreaterEqual = 6,
|
||||||
|
Always = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class FilterMode : u64 {
|
||||||
|
Blend = 0,
|
||||||
|
Min = 1,
|
||||||
|
Max = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Filter : u64 {
|
||||||
|
Point = 0,
|
||||||
|
Bilinear = 1,
|
||||||
|
AnisoPoint = 2,
|
||||||
|
AnisoLinear = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class MipFilter : u64 {
|
||||||
|
None = 0,
|
||||||
|
Point = 1,
|
||||||
|
Linear = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BorderColor : u64 {
|
||||||
|
OpaqueBlack = 0,
|
||||||
|
TransparentBlack = 1,
|
||||||
|
White = 2,
|
||||||
|
Custom = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Table 8.12 Sampler Resource Definition
|
||||||
|
struct Sampler {
|
||||||
|
union {
|
||||||
|
BitField<0, 3, ClampMode> clamp_x;
|
||||||
|
BitField<3, 3, ClampMode> clamp_y;
|
||||||
|
BitField<6, 3, ClampMode> clamp_z;
|
||||||
|
BitField<9, 3, AnisoRatio> max_aniso;
|
||||||
|
BitField<12, 3, DepthCompare> depth_compare_func;
|
||||||
|
BitField<15, 1, u64> force_unnormalized;
|
||||||
|
BitField<16, 3, u64> aniso_threshold;
|
||||||
|
BitField<19, 1, u64> mc_coord_trunc;
|
||||||
|
BitField<20, 1, u64> force_degamma;
|
||||||
|
BitField<21, 6, u64> aniso_bias;
|
||||||
|
BitField<27, 1, u64> trunc_coord;
|
||||||
|
BitField<28, 1, u64> disable_cube_wrap;
|
||||||
|
BitField<29, 2, FilterMode> filter_mode;
|
||||||
|
BitField<32, 12, u64> min_lod;
|
||||||
|
BitField<44, 12, u64> max_lod;
|
||||||
|
BitField<56, 4, u64> perf_mip;
|
||||||
|
BitField<60, 4, u64> perf_z;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
BitField<0, 14, u64> lod_bias;
|
||||||
|
BitField<14, 6, u64> lod_bias_sec;
|
||||||
|
BitField<20, 2, Filter> xy_mag_filter;
|
||||||
|
BitField<22, 2, Filter> xy_min_filter;
|
||||||
|
BitField<24, 2, u64> z_filter;
|
||||||
|
BitField<26, 2, MipFilter> mip_filter;
|
||||||
|
BitField<28, 1, u64> mip_point_preclamp;
|
||||||
|
BitField<29, 1, u64> disable_lsb_ceil;
|
||||||
|
BitField<30, 2, u64> unused0;
|
||||||
|
BitField<32, 12, u64> border_color_ptr;
|
||||||
|
BitField<42, 18, u64> unused1;
|
||||||
|
BitField<62, 2, BorderColor> border_color_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
float LodBias() const noexcept {
|
||||||
|
return static_cast<float>(lod_bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
float MinLod() const noexcept {
|
||||||
|
return static_cast<float>(min_lod);
|
||||||
|
}
|
||||||
|
|
||||||
|
float MaxLod() const noexcept {
|
||||||
|
return static_cast<float>(max_lod);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<AmdGpu::ImageType> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(AmdGpu::ImageType type, format_context& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
namespace Vulkan::LiverpoolToVK {
|
namespace Vulkan::LiverpoolToVK {
|
||||||
|
|
||||||
|
using DepthBuffer = Liverpool::DepthBuffer;
|
||||||
|
|
||||||
vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
|
vk::StencilOp StencilOp(Liverpool::StencilFunc op) {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case Liverpool::StencilFunc::Keep:
|
case Liverpool::StencilFunc::Keep:
|
||||||
|
@ -77,6 +79,8 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
|
||||||
case Liverpool::PrimitiveType::QuadList:
|
case Liverpool::PrimitiveType::QuadList:
|
||||||
// Needs to generate index buffer on the fly.
|
// Needs to generate index buffer on the fly.
|
||||||
return vk::PrimitiveTopology::eTriangleList;
|
return vk::PrimitiveTopology::eTriangleList;
|
||||||
|
case Liverpool::PrimitiveType::RectList:
|
||||||
|
return vk::PrimitiveTopology::eTriangleStrip;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return vk::PrimitiveTopology::eTriangleList;
|
return vk::PrimitiveTopology::eTriangleList;
|
||||||
|
@ -113,6 +117,161 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||||
|
using BlendFactor = Liverpool::BlendControl::BlendFactor;
|
||||||
|
switch (factor) {
|
||||||
|
case BlendFactor::Zero:
|
||||||
|
return vk::BlendFactor::eZero;
|
||||||
|
case BlendFactor::One:
|
||||||
|
return vk::BlendFactor::eOne;
|
||||||
|
case BlendFactor::SrcColor:
|
||||||
|
return vk::BlendFactor::eSrcColor;
|
||||||
|
case BlendFactor::OneMinusSrcColor:
|
||||||
|
return vk::BlendFactor::eOneMinusSrcColor;
|
||||||
|
case BlendFactor::SrcAlpha:
|
||||||
|
return vk::BlendFactor::eSrcAlpha;
|
||||||
|
case BlendFactor::OneMinusSrcAlpha:
|
||||||
|
return vk::BlendFactor::eOneMinusSrcAlpha;
|
||||||
|
case BlendFactor::DstAlpha:
|
||||||
|
return vk::BlendFactor::eDstAlpha;
|
||||||
|
case BlendFactor::OneMinusDstAlpha:
|
||||||
|
return vk::BlendFactor::eOneMinusDstAlpha;
|
||||||
|
case BlendFactor::DstColor:
|
||||||
|
return vk::BlendFactor::eDstColor;
|
||||||
|
case BlendFactor::OneMinusDstColor:
|
||||||
|
return vk::BlendFactor::eOneMinusDstColor;
|
||||||
|
case BlendFactor::SrcAlphaSaturate:
|
||||||
|
return vk::BlendFactor::eSrcAlphaSaturate;
|
||||||
|
case BlendFactor::ConstantColor:
|
||||||
|
return vk::BlendFactor::eConstantColor;
|
||||||
|
case BlendFactor::OneMinusConstantColor:
|
||||||
|
return vk::BlendFactor::eOneMinusConstantColor;
|
||||||
|
case BlendFactor::Src1Color:
|
||||||
|
return vk::BlendFactor::eSrc1Color;
|
||||||
|
case BlendFactor::InvSrc1Color:
|
||||||
|
return vk::BlendFactor::eOneMinusSrc1Color;
|
||||||
|
case BlendFactor::Src1Alpha:
|
||||||
|
return vk::BlendFactor::eSrc1Alpha;
|
||||||
|
case BlendFactor::InvSrc1Alpha:
|
||||||
|
return vk::BlendFactor::eOneMinusSrc1Alpha;
|
||||||
|
case BlendFactor::ConstantAlpha:
|
||||||
|
return vk::BlendFactor::eConstantAlpha;
|
||||||
|
case BlendFactor::OneMinusConstantAlpha:
|
||||||
|
return vk::BlendFactor::eOneMinusConstantAlpha;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
||||||
|
using BlendFunc = Liverpool::BlendControl::BlendFunc;
|
||||||
|
switch (func) {
|
||||||
|
case BlendFunc::Add:
|
||||||
|
return vk::BlendOp::eAdd;
|
||||||
|
case BlendFunc::Subtract:
|
||||||
|
return vk::BlendOp::eSubtract;
|
||||||
|
case BlendFunc::Min:
|
||||||
|
return vk::BlendOp::eMin;
|
||||||
|
case BlendFunc::Max:
|
||||||
|
return vk::BlendOp::eMax;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21
|
||||||
|
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) {
|
||||||
|
switch (mode) {
|
||||||
|
case AmdGpu::ClampMode::Wrap:
|
||||||
|
return vk::SamplerAddressMode::eRepeat;
|
||||||
|
case AmdGpu::ClampMode::Mirror:
|
||||||
|
return vk::SamplerAddressMode::eMirroredRepeat;
|
||||||
|
case AmdGpu::ClampMode::ClampLastTexel:
|
||||||
|
return vk::SamplerAddressMode::eClampToEdge;
|
||||||
|
case AmdGpu::ClampMode::MirrorOnceLastTexel:
|
||||||
|
return vk::SamplerAddressMode::eMirrorClampToEdge;
|
||||||
|
case AmdGpu::ClampMode::ClampBorder:
|
||||||
|
return vk::SamplerAddressMode::eClampToBorder;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp) {
|
||||||
|
switch (comp) {
|
||||||
|
case AmdGpu::DepthCompare::Never:
|
||||||
|
return vk::CompareOp::eNever;
|
||||||
|
case AmdGpu::DepthCompare::Less:
|
||||||
|
return vk::CompareOp::eLess;
|
||||||
|
case AmdGpu::DepthCompare::Equal:
|
||||||
|
return vk::CompareOp::eEqual;
|
||||||
|
case AmdGpu::DepthCompare::LessEqual:
|
||||||
|
return vk::CompareOp::eLessOrEqual;
|
||||||
|
case AmdGpu::DepthCompare::Greater:
|
||||||
|
return vk::CompareOp::eGreater;
|
||||||
|
case AmdGpu::DepthCompare::NotEqual:
|
||||||
|
return vk::CompareOp::eNotEqual;
|
||||||
|
case AmdGpu::DepthCompare::GreaterEqual:
|
||||||
|
return vk::CompareOp::eGreaterOrEqual;
|
||||||
|
case AmdGpu::DepthCompare::Always:
|
||||||
|
return vk::CompareOp::eAlways;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::Filter Filter(AmdGpu::Filter filter) {
|
||||||
|
switch (filter) {
|
||||||
|
case AmdGpu::Filter::Point:
|
||||||
|
case AmdGpu::Filter::AnisoPoint:
|
||||||
|
return vk::Filter::eNearest;
|
||||||
|
case AmdGpu::Filter::Bilinear:
|
||||||
|
case AmdGpu::Filter::AnisoLinear:
|
||||||
|
return vk::Filter::eLinear;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::SamplerReductionMode FilterMode(AmdGpu::FilterMode mode) {
|
||||||
|
switch (mode) {
|
||||||
|
case AmdGpu::FilterMode::Blend:
|
||||||
|
return vk::SamplerReductionMode::eWeightedAverage;
|
||||||
|
case AmdGpu::FilterMode::Min:
|
||||||
|
return vk::SamplerReductionMode::eMin;
|
||||||
|
case AmdGpu::FilterMode::Max:
|
||||||
|
return vk::SamplerReductionMode::eMax;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter) {
|
||||||
|
switch (filter) {
|
||||||
|
case AmdGpu::MipFilter::Point:
|
||||||
|
return vk::SamplerMipmapMode::eNearest;
|
||||||
|
case AmdGpu::MipFilter::Linear:
|
||||||
|
return vk::SamplerMipmapMode::eLinear;
|
||||||
|
case AmdGpu::MipFilter::None:
|
||||||
|
return vk::SamplerMipmapMode::eNearest;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::BorderColor BorderColor(AmdGpu::BorderColor color) {
|
||||||
|
switch (color) {
|
||||||
|
case AmdGpu::BorderColor::OpaqueBlack:
|
||||||
|
return vk::BorderColor::eFloatOpaqueBlack;
|
||||||
|
case AmdGpu::BorderColor::TransparentBlack:
|
||||||
|
return vk::BorderColor::eFloatTransparentBlack;
|
||||||
|
case AmdGpu::BorderColor::White:
|
||||||
|
return vk::BorderColor::eFloatOpaqueWhite;
|
||||||
|
case AmdGpu::BorderColor::Custom:
|
||||||
|
return vk::BorderColor::eFloatCustomEXT;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||||
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
|
||||||
num_format == AmdGpu::NumberFormat::Float) {
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
@ -130,11 +289,22 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
num_format == AmdGpu::NumberFormat::Srgb) {
|
num_format == AmdGpu::NumberFormat::Srgb) {
|
||||||
return vk::Format::eR8G8B8A8Srgb;
|
return vk::Format::eR8G8B8A8Srgb;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
return vk::Format::eR32G32B32Sfloat;
|
||||||
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format32_32 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Float) {
|
||||||
|
return vk::Format::eR32G32Sfloat;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format) {
|
||||||
Liverpool::DepthBuffer::StencilFormat stencil_format) {
|
if (z_format == DepthBuffer::ZFormat::Z32Float &&
|
||||||
|
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
||||||
|
return vk::Format::eD32SfloatS8Uint;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
namespace Vulkan::LiverpoolToVK {
|
namespace Vulkan::LiverpoolToVK {
|
||||||
|
@ -21,6 +22,22 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);
|
||||||
|
|
||||||
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
|
vk::CullModeFlags CullMode(Liverpool::CullMode mode);
|
||||||
|
|
||||||
|
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||||
|
|
||||||
|
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
||||||
|
|
||||||
|
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
||||||
|
|
||||||
|
vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp);
|
||||||
|
|
||||||
|
vk::Filter Filter(AmdGpu::Filter filter);
|
||||||
|
|
||||||
|
vk::SamplerReductionMode FilterMode(AmdGpu::FilterMode mode);
|
||||||
|
|
||||||
|
vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter);
|
||||||
|
|
||||||
|
vk::BorderColor BorderColor(AmdGpu::BorderColor color);
|
||||||
|
|
||||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
|
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
|
||||||
|
|
||||||
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
|
||||||
|
|
|
@ -10,6 +10,8 @@
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
@ -25,8 +27,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
}
|
}
|
||||||
stages[i] = *infos[i];
|
stages[i] = *infos[i];
|
||||||
}
|
}
|
||||||
|
BuildDescSetLayout();
|
||||||
desc_layout = BuildSetLayout();
|
|
||||||
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
const vk::DescriptorSetLayout set_layout = *desc_layout;
|
||||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
.setLayoutCount = 1U,
|
.setLayoutCount = 1U,
|
||||||
|
@ -61,6 +62,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
.pVertexAttributeDescriptions = attributes.data(),
|
.pVertexAttributeDescriptions = attributes.data(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ASSERT_MSG(key.prim_type != Liverpool::PrimitiveType::RectList || IsEmbeddedVs(),
|
||||||
|
"Rectangle List primitive type is only supported for embedded VS");
|
||||||
|
|
||||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||||
.topology = LiverpoolToVK::PrimitiveType(key.prim_type),
|
.topology = LiverpoolToVK::PrimitiveType(key.prim_type),
|
||||||
.primitiveRestartEnable = false,
|
.primitiveRestartEnable = false,
|
||||||
|
@ -81,20 +85,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
.sampleShadingEnable = false,
|
.sampleShadingEnable = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
|
|
||||||
.blendEnable = false,
|
|
||||||
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
|
||||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
|
|
||||||
};
|
|
||||||
|
|
||||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
|
||||||
.logicOpEnable = false,
|
|
||||||
.logicOp = vk::LogicOp::eCopy,
|
|
||||||
.attachmentCount = 1,
|
|
||||||
.pAttachments = &colorblend_attachment,
|
|
||||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
|
||||||
};
|
|
||||||
|
|
||||||
const vk::Viewport viewport = {
|
const vk::Viewport viewport = {
|
||||||
.x = 0.0f,
|
.x = 0.0f,
|
||||||
.y = 0.0f,
|
.y = 0.0f,
|
||||||
|
@ -119,6 +109,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
boost::container::static_vector<vk::DynamicState, 14> dynamic_states = {
|
boost::container::static_vector<vk::DynamicState, 14> dynamic_states = {
|
||||||
vk::DynamicState::eViewport,
|
vk::DynamicState::eViewport,
|
||||||
vk::DynamicState::eScissor,
|
vk::DynamicState::eScissor,
|
||||||
|
vk::DynamicState::eBlendConstants,
|
||||||
};
|
};
|
||||||
|
|
||||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||||
|
@ -174,6 +165,30 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
.stencilAttachmentFormat = vk::Format::eUndefined,
|
.stencilAttachmentFormat = vk::Format::eUndefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
||||||
|
for (u32 i = 0; i < num_color_formats; i++) {
|
||||||
|
const auto& control = key.blend_controls[i];
|
||||||
|
attachments[i] = vk::PipelineColorBlendAttachmentState{
|
||||||
|
.blendEnable = key.blend_controls[i].enable,
|
||||||
|
.srcColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_src_factor),
|
||||||
|
.dstColorBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
|
||||||
|
.colorBlendOp = LiverpoolToVK::BlendOp(control.color_func),
|
||||||
|
.srcAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.alpha_src_factor),
|
||||||
|
.dstAlphaBlendFactor = LiverpoolToVK::BlendFactor(control.color_dst_factor),
|
||||||
|
.alphaBlendOp = LiverpoolToVK::BlendOp(control.alpha_func),
|
||||||
|
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||||
|
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||||
|
.logicOpEnable = false,
|
||||||
|
.logicOp = vk::LogicOp::eCopy,
|
||||||
|
.attachmentCount = num_color_formats,
|
||||||
|
.pAttachments = attachments.data(),
|
||||||
|
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
||||||
|
};
|
||||||
|
|
||||||
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||||
.pNext = &pipeline_rendering_ci,
|
.pNext = &pipeline_rendering_ci,
|
||||||
.stageCount = shader_count,
|
.stageCount = shader_count,
|
||||||
|
@ -199,14 +214,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
|
|
||||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||||
|
|
||||||
vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const {
|
void GraphicsPipeline::BuildDescSetLayout() {
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||||
for (const auto& stage : stages) {
|
for (const auto& stage : stages) {
|
||||||
for (const auto& buffer : stage.buffers) {
|
for (const auto& buffer : stage.buffers) {
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
.binding = binding++,
|
.binding = binding++,
|
||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||||
|
: vk::DescriptorType::eUniformBuffer,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (const auto& image : stage.images) {
|
||||||
|
bindings.push_back({
|
||||||
|
.binding = binding++,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (const auto& sampler : stage.samplers) {
|
||||||
|
bindings.push_back({
|
||||||
|
.binding = binding++,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampler,
|
||||||
.descriptorCount = 1,
|
.descriptorCount = 1,
|
||||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||||
});
|
});
|
||||||
|
@ -217,12 +249,24 @@ vk::UniqueDescriptorSetLayout GraphicsPipeline::BuildSetLayout() const {
|
||||||
.bindingCount = static_cast<u32>(bindings.size()),
|
.bindingCount = static_cast<u32>(bindings.size()),
|
||||||
.pBindings = bindings.data(),
|
.pBindings = bindings.data(),
|
||||||
};
|
};
|
||||||
return instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
desc_layout = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
|
void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
|
VideoCore::TextureCache& texture_cache) const {
|
||||||
|
static constexpr u64 MinUniformAlignment = 64;
|
||||||
|
|
||||||
|
const auto map_staging = [&](auto src, size_t size) {
|
||||||
|
const auto [data, offset, _] = staging.Map(size, MinUniformAlignment);
|
||||||
|
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||||
|
staging.Commit(size);
|
||||||
|
return offset;
|
||||||
|
};
|
||||||
|
|
||||||
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
|
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
|
||||||
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
|
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
|
||||||
|
VAddr base_address = 0;
|
||||||
|
u32 start_offset = 0;
|
||||||
|
|
||||||
// Bind vertex buffer.
|
// Bind vertex buffer.
|
||||||
const auto& vs_info = stages[0];
|
const auto& vs_info = stages[0];
|
||||||
|
@ -230,38 +274,77 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
|
||||||
for (u32 i = 0; i < num_buffers; ++i) {
|
for (u32 i = 0; i < num_buffers; ++i) {
|
||||||
const auto& input = vs_info.vs_inputs[i];
|
const auto& input = vs_info.vs_inputs[i];
|
||||||
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||||
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
|
if (i == 0) {
|
||||||
|
start_offset =
|
||||||
|
map_staging(buffer.base_address.Value(), buffer.stride * buffer.num_records);
|
||||||
|
base_address = buffer.base_address;
|
||||||
|
}
|
||||||
|
buffers[i] = staging.Handle();
|
||||||
|
offsets[i] = start_offset + buffer.base_address - base_address;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
|
if (num_buffers > 0) {
|
||||||
|
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
|
||||||
|
}
|
||||||
|
|
||||||
// Bind resource buffers and textures.
|
// Bind resource buffers and textures.
|
||||||
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
|
||||||
|
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
||||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
||||||
for (const auto& stage : stages) {
|
for (const auto& stage : stages) {
|
||||||
for (const auto& buffer : stage.buffers) {
|
for (const auto& buffer : stage.buffers) {
|
||||||
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
|
||||||
const auto [vk_buffer, offset] = memory->GetVulkanBuffer(vsharp.base_address);
|
const u32 size = vsharp.stride * vsharp.num_records;
|
||||||
buffer_infos.push_back({
|
const u32 offset = map_staging(vsharp.base_address.Value(), size);
|
||||||
.buffer = vk_buffer,
|
buffer_infos.emplace_back(staging.Handle(), offset, size);
|
||||||
.offset = offset,
|
|
||||||
.range = vsharp.stride * vsharp.num_records,
|
|
||||||
});
|
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
.dstBinding = binding,
|
.dstBinding = binding++,
|
||||||
.dstArrayElement = 0,
|
.dstArrayElement = 0,
|
||||||
.descriptorCount = 1,
|
.descriptorCount = 1,
|
||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||||
|
: vk::DescriptorType::eUniformBuffer,
|
||||||
.pBufferInfo = &buffer_infos.back(),
|
.pBufferInfo = &buffer_infos.back(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const auto& image : stage.images) {
|
||||||
|
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
|
||||||
|
const auto& image_view = texture_cache.FindImageView(tsharp);
|
||||||
|
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
|
||||||
|
vk::ImageLayout::eGeneral);
|
||||||
|
set_writes.push_back({
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = binding++,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||||
|
.pImageInfo = &image_infos.back(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (const auto& sampler : stage.samplers) {
|
||||||
|
const auto ssharp =
|
||||||
|
stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
||||||
|
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||||
|
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||||
|
set_writes.push_back({
|
||||||
|
.dstSet = VK_NULL_HANDLE,
|
||||||
|
.dstBinding = binding++,
|
||||||
|
.dstArrayElement = 0,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = vk::DescriptorType::eSampler,
|
||||||
|
.pImageInfo = &image_infos.back(),
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes);
|
if (!set_writes.empty()) {
|
||||||
|
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||||
|
set_writes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -11,6 +11,10 @@ namespace Core {
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
class TextureCache;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
static constexpr u32 MaxVertexBufferCount = 32;
|
static constexpr u32 MaxVertexBufferCount = 32;
|
||||||
|
@ -18,6 +22,7 @@ static constexpr u32 MaxShaderStages = 5;
|
||||||
|
|
||||||
class Instance;
|
class Instance;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
class StreamBuffer;
|
||||||
|
|
||||||
using Liverpool = AmdGpu::Liverpool;
|
using Liverpool = AmdGpu::Liverpool;
|
||||||
|
|
||||||
|
@ -33,6 +38,7 @@ struct PipelineKey {
|
||||||
Liverpool::PrimitiveType prim_type;
|
Liverpool::PrimitiveType prim_type;
|
||||||
Liverpool::PolygonMode polygon_mode;
|
Liverpool::PolygonMode polygon_mode;
|
||||||
Liverpool::CullMode cull_mode;
|
Liverpool::CullMode cull_mode;
|
||||||
|
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||||
|
|
||||||
bool operator==(const PipelineKey& key) const noexcept {
|
bool operator==(const PipelineKey& key) const noexcept {
|
||||||
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
|
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
|
||||||
|
@ -48,14 +54,20 @@ public:
|
||||||
std::array<vk::ShaderModule, MaxShaderStages> modules);
|
std::array<vk::ShaderModule, MaxShaderStages> modules);
|
||||||
~GraphicsPipeline();
|
~GraphicsPipeline();
|
||||||
|
|
||||||
void BindResources(Core::MemoryManager* memory) const;
|
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
|
VideoCore::TextureCache& texture_cache) const;
|
||||||
|
|
||||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||||
return *pipeline;
|
return *pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
|
||||||
|
static constexpr size_t EmbeddedVsHash = 0x59c556606a027efd;
|
||||||
|
return key.stage_hashes[0] == EmbeddedVsHash;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
vk::UniqueDescriptorSetLayout BuildSetLayout() const;
|
void BuildDescSetLayout();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
|
|
|
@ -151,6 +151,7 @@ bool Instance::CreateDevice() {
|
||||||
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||||
index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||||
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||||
|
add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||||
|
|
||||||
const auto family_properties = physical_device.getQueueFamilyProperties();
|
const auto family_properties = physical_device.getQueueFamilyProperties();
|
||||||
if (family_properties.empty()) {
|
if (family_properties.empty()) {
|
||||||
|
@ -213,6 +214,9 @@ bool Instance::CreateDevice() {
|
||||||
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{
|
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{
|
||||||
.indexTypeUint8 = true,
|
.indexTypeUint8 = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceMaintenance4Features{
|
||||||
|
.maintenance4 = true,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!index_type_uint8) {
|
if (!index_type_uint8) {
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <fstream>
|
#include <xxhash.h>
|
||||||
|
#include "common/config.h"
|
||||||
|
#include "common/io_file.h"
|
||||||
|
#include "common/path_util.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/recompiler.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
@ -41,6 +43,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
||||||
block_pool{512} {
|
block_pool{512} {
|
||||||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||||
|
profile = Shader::Profile{
|
||||||
|
.supported_spirv = 0x00010600U,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const GraphicsPipeline* PipelineCache::GetPipeline() {
|
const GraphicsPipeline* PipelineCache::GetPipeline() {
|
||||||
|
@ -63,6 +68,7 @@ void PipelineCache::RefreshKey() {
|
||||||
key.stencil_ref_back = regs.stencil_ref_back;
|
key.stencil_ref_back = regs.stencil_ref_back;
|
||||||
key.prim_type = regs.primitive_type;
|
key.prim_type = regs.primitive_type;
|
||||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||||
|
key.blend_controls = regs.blend_control;
|
||||||
|
|
||||||
const auto& db = regs.depth_buffer;
|
const auto& db = regs.depth_buffer;
|
||||||
key.depth_format = key.depth.depth_enable
|
key.depth_format = key.depth.depth_enable
|
||||||
|
@ -81,17 +87,15 @@ void PipelineCache::RefreshKey() {
|
||||||
key.stage_hashes[i] = 0;
|
key.stage_hashes[i] = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const u32* code = pgm->Address<u32>();
|
const auto code = pgm->Code();
|
||||||
|
key.stage_hashes[i] = XXH3_64bits(code.data(), code.size_bytes());
|
||||||
Shader::BinaryInfo bininfo;
|
|
||||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
|
||||||
key.stage_hashes[i] = bininfo.shader_hash;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
|
|
||||||
|
u32 binding{};
|
||||||
std::array<Shader::IR::Program, MaxShaderStages> programs;
|
std::array<Shader::IR::Program, MaxShaderStages> programs;
|
||||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||||
|
|
||||||
|
@ -101,40 +105,52 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto* pgm = regs.ProgramForStage(i);
|
auto* pgm = regs.ProgramForStage(i);
|
||||||
const u32* code = pgm->Address<u32>();
|
const auto code = pgm->Code();
|
||||||
|
|
||||||
Shader::BinaryInfo bininfo;
|
const auto it = module_map.find(graphics_key.stage_hashes[i]);
|
||||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
|
||||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
|
||||||
|
|
||||||
const auto it = module_map.find(bininfo.shader_hash);
|
|
||||||
if (it != module_map.end()) {
|
if (it != module_map.end()) {
|
||||||
stages[i] = *it->second;
|
stages[i] = *it->second;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dump shader code if requested.
|
||||||
|
const auto stage = Shader::Stage{i};
|
||||||
|
const u64 hash = graphics_key.stage_hashes[i];
|
||||||
|
if (Config::dumpShaders()) {
|
||||||
|
DumpShader(code, hash, stage, "bin");
|
||||||
|
}
|
||||||
|
|
||||||
block_pool.ReleaseContents();
|
block_pool.ReleaseContents();
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
|
|
||||||
// Recompile shader to IR.
|
// Recompile shader to IR.
|
||||||
const auto stage = Shader::Stage{i};
|
|
||||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
|
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||||
std::move(info));
|
|
||||||
|
|
||||||
// Compile IR to SPIR-V
|
// Compile IR to SPIR-V
|
||||||
const auto profile = Shader::Profile{.supported_spirv = 0x00010600U};
|
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||||
const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i]);
|
|
||||||
std::ofstream file("shader0.spv", std::ios::out | std::ios::binary);
|
|
||||||
file.write((const char*)spv_code.data(), spv_code.size() * 4);
|
|
||||||
file.close();
|
|
||||||
|
|
||||||
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
stages[i] = CompileSPV(spv_code, instance.GetDevice());
|
||||||
infos[i] = &programs[i].info;
|
infos[i] = &programs[i].info;
|
||||||
|
|
||||||
|
if (Config::dumpShaders()) {
|
||||||
|
DumpShader(spv_code, hash, stage, "spv");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
|
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
|
||||||
infos, stages);
|
infos, stages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||||
|
std::string_view ext) {
|
||||||
|
using namespace Common::FS;
|
||||||
|
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||||
|
if (!std::filesystem::exists(dump_dir)) {
|
||||||
|
std::filesystem::create_directories(dump_dir);
|
||||||
|
}
|
||||||
|
const auto filename = fmt::format("{}_{:#X}.{}", stage, hash, ext);
|
||||||
|
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||||
|
file.WriteSpan(code);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/object_pool.h"
|
#include "shader_recompiler/object_pool.h"
|
||||||
|
#include "shader_recompiler/profile.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
@ -32,6 +33,8 @@ private:
|
||||||
|
|
||||||
std::unique_ptr<GraphicsPipeline> CreatePipeline();
|
std::unique_ptr<GraphicsPipeline> CreatePipeline();
|
||||||
|
|
||||||
|
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, std::string_view ext);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
|
@ -41,6 +44,7 @@ private:
|
||||||
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
|
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
|
||||||
std::array<vk::ShaderModule, MaxShaderStages> stages{};
|
std::array<vk::ShaderModule, MaxShaderStages> stages{};
|
||||||
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||||
|
Shader::Profile profile{};
|
||||||
PipelineKey graphics_key{};
|
PipelineKey graphics_key{};
|
||||||
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||||
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
||||||
|
|
|
@ -12,16 +12,17 @@
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits::eVertexBuffer |
|
static constexpr vk::BufferUsageFlags VertexIndexFlags =
|
||||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
|
||||||
vk::BufferUsageFlagBits::eTransferDst;
|
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformBuffer |
|
||||||
|
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||||
|
|
||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
|
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||||
pipeline_cache{instance, scheduler, liverpool},
|
pipeline_cache{instance, scheduler, liverpool},
|
||||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
|
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 32_MB} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
liverpool->BindRasterizer(this);
|
liverpool->BindRasterizer(this);
|
||||||
}
|
}
|
||||||
|
@ -35,9 +36,10 @@ void Rasterizer::Draw(bool is_indexed) {
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
const u32 num_indices = SetupIndexBuffer(is_indexed);
|
const u32 num_indices = SetupIndexBuffer(is_indexed);
|
||||||
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
|
|
||||||
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
|
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
|
||||||
pipeline->BindResources(memory);
|
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
|
|
||||||
|
const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
|
||||||
|
|
||||||
const vk::RenderingAttachmentInfo color_info = {
|
const vk::RenderingAttachmentInfo color_info = {
|
||||||
.imageView = *image_view.image_view,
|
.imageView = *image_view.image_view,
|
||||||
|
@ -61,7 +63,8 @@ void Rasterizer::Draw(bool is_indexed) {
|
||||||
if (is_indexed) {
|
if (is_indexed) {
|
||||||
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), 0, 0);
|
const u32 num_vertices = pipeline->IsEmbeddedVs() ? 4 : regs.num_indices;
|
||||||
|
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), 0, 0);
|
||||||
}
|
}
|
||||||
cmdbuf.endRendering();
|
cmdbuf.endRendering();
|
||||||
}
|
}
|
||||||
|
@ -88,18 +91,30 @@ u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
|
||||||
return regs.num_indices;
|
return regs.num_indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
const VAddr index_address = regs.index_base_address.Address();
|
// Figure out index type and size.
|
||||||
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
|
const bool is_index16 = regs.index_buffer_type.index_type == Liverpool::IndexType::Index16;
|
||||||
const vk::IndexType index_type =
|
const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32;
|
||||||
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
|
const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32);
|
||||||
: vk::IndexType::eUint32;
|
|
||||||
|
// Upload index data to stream buffer.
|
||||||
|
const auto index_address = regs.index_base_address.Address<const void*>();
|
||||||
|
const u32 index_buffer_size = regs.num_indices * index_size;
|
||||||
|
const auto [data, offset, _] = vertex_index_buffer.Map(index_buffer_size);
|
||||||
|
std::memcpy(data, index_address, index_buffer_size);
|
||||||
|
vertex_index_buffer.Commit(index_buffer_size);
|
||||||
|
|
||||||
|
// Bind index buffer.
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
|
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, index_type);
|
||||||
return regs.num_indices;
|
return regs.num_indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::UpdateDynamicState() {
|
void Rasterizer::UpdateDynamicState() {
|
||||||
UpdateViewportScissorState();
|
UpdateViewportScissorState();
|
||||||
|
|
||||||
|
auto& regs = liverpool->regs;
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.setBlendConstants(®s.blend_constants.red);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::UpdateViewportScissorState() {
|
void Rasterizer::UpdateViewportScissorState() {
|
||||||
|
|
|
@ -44,6 +44,22 @@ using Libraries::VideoOut::TilingMode;
|
||||||
return usage;
|
return usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
||||||
|
switch (type) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
return vk::ImageType::e1D;
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
return vk::ImageType::e2D;
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
return vk::ImageType::e3D;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
|
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept {
|
||||||
const auto& attrib = group.attrib;
|
const auto& attrib = group.attrib;
|
||||||
is_tiled = attrib.tiling_mode == TilingMode::Tile;
|
is_tiled = attrib.tiling_mode == TilingMode::Tile;
|
||||||
|
@ -72,10 +88,23 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
|
||||||
type = vk::ImageType::e2D;
|
type = vk::ImageType::e2D;
|
||||||
size.width = buffer.Pitch();
|
size.width = buffer.Pitch();
|
||||||
size.height = buffer.Height();
|
size.height = buffer.Height();
|
||||||
|
size.depth = 1;
|
||||||
pitch = size.width;
|
pitch = size.width;
|
||||||
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
|
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||||
|
is_tiled = false;
|
||||||
|
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||||
|
type = ConvertImageType(image.type);
|
||||||
|
size.width = image.width + 1;
|
||||||
|
size.height = image.height + 1;
|
||||||
|
size.depth = 1;
|
||||||
|
// TODO: Derive this properly from tiling params
|
||||||
|
pitch = size.width;
|
||||||
|
guest_size_bytes = size.width * size.height * 4;
|
||||||
|
}
|
||||||
|
|
||||||
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
|
||||||
: device{device_}, allocator{allocator_} {}
|
: device{device_}, allocator{allocator_} {}
|
||||||
|
|
||||||
|
@ -109,7 +138,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
||||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||||
vk::ImageCreateFlags flags{};
|
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat};
|
||||||
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
||||||
info.size.width == info.size.height) {
|
info.size.width == info.size.height) {
|
||||||
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "core/libraries/videoout/buffer.h"
|
#include "core/libraries/videoout/buffer.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
|
@ -34,6 +35,7 @@ struct ImageInfo {
|
||||||
ImageInfo() = default;
|
ImageInfo() = default;
|
||||||
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
|
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
|
||||||
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
|
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
|
||||||
|
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
|
||||||
|
|
||||||
bool is_tiled = false;
|
bool is_tiled = false;
|
||||||
vk::Format pixel_format = vk::Format::eUndefined;
|
vk::Format pixel_format = vk::Format::eUndefined;
|
||||||
|
|
|
@ -1,11 +1,62 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) {
|
||||||
|
switch (type) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
return vk::ImageViewType::e1D;
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
return vk::ImageViewType::e1DArray;
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
return vk::ImageViewType::e2D;
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
return vk::ImageViewType::e2DArray;
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
return vk::ImageViewType::e3D;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
||||||
|
switch (dst_sel) {
|
||||||
|
case 0:
|
||||||
|
return vk::ComponentSwizzle::eZero;
|
||||||
|
case 1:
|
||||||
|
return vk::ComponentSwizzle::eOne;
|
||||||
|
case 4:
|
||||||
|
return vk::ComponentSwizzle::eR;
|
||||||
|
case 5:
|
||||||
|
return vk::ComponentSwizzle::eG;
|
||||||
|
case 6:
|
||||||
|
return vk::ComponentSwizzle::eB;
|
||||||
|
case 7:
|
||||||
|
return vk::ComponentSwizzle::eA;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
|
||||||
|
type = ConvertImageViewType(image.type);
|
||||||
|
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||||
|
range.base.level = image.base_level;
|
||||||
|
range.base.layer = 0;
|
||||||
|
range.extent.levels = 1;
|
||||||
|
range.extent.layers = 1;
|
||||||
|
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||||
|
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||||
|
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||||
|
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||||
|
}
|
||||||
|
|
||||||
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||||
const ImageViewInfo& info_, vk::Image image)
|
const ImageViewInfo& info_, vk::Image image)
|
||||||
: info{info_} {
|
: info{info_} {
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
|
|
||||||
|
@ -14,6 +15,9 @@ class Scheduler;
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
struct ImageViewInfo {
|
struct ImageViewInfo {
|
||||||
|
explicit ImageViewInfo() = default;
|
||||||
|
explicit ImageViewInfo(const AmdGpu::Image& image) noexcept;
|
||||||
|
|
||||||
vk::ImageViewType type = vk::ImageViewType::e2D;
|
vk::ImageViewType type = vk::ImageViewType::e2D;
|
||||||
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
vk::Format format = vk::Format::eR8G8B8A8Unorm;
|
||||||
SubresourceRange range;
|
SubresourceRange range;
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
|
#include "video_core/texture_cache/sampler.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) {
|
||||||
|
using namespace Vulkan;
|
||||||
|
const vk::SamplerCreateInfo sampler_ci = {
|
||||||
|
.magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter),
|
||||||
|
.minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter),
|
||||||
|
.mipmapMode = LiverpoolToVK::MipFilter(sampler.mip_filter),
|
||||||
|
.addressModeU = LiverpoolToVK::ClampMode(sampler.clamp_x),
|
||||||
|
.addressModeV = LiverpoolToVK::ClampMode(sampler.clamp_y),
|
||||||
|
.addressModeW = LiverpoolToVK::ClampMode(sampler.clamp_z),
|
||||||
|
.mipLodBias = sampler.LodBias(),
|
||||||
|
.compareEnable = sampler.depth_compare_func != AmdGpu::DepthCompare::Never,
|
||||||
|
.compareOp = LiverpoolToVK::DepthCompare(sampler.depth_compare_func),
|
||||||
|
.minLod = sampler.MinLod(),
|
||||||
|
.maxLod = sampler.MaxLod(),
|
||||||
|
.borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type),
|
||||||
|
.unnormalizedCoordinates = bool(sampler.force_unnormalized),
|
||||||
|
};
|
||||||
|
handle = instance.GetDevice().createSamplerUnique(sampler_ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
Sampler::~Sampler() = default;
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -0,0 +1,34 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
class Instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
class Sampler {
|
||||||
|
public:
|
||||||
|
explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler);
|
||||||
|
~Sampler();
|
||||||
|
|
||||||
|
Sampler(const Sampler&) = delete;
|
||||||
|
Sampler& operator=(const Sampler&) = delete;
|
||||||
|
|
||||||
|
Sampler(Sampler&&) = default;
|
||||||
|
Sampler& operator=(Sampler&&) = default;
|
||||||
|
|
||||||
|
vk::Sampler Handle() const noexcept {
|
||||||
|
return *handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
vk::UniqueSampler handle;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -1,10 +1,9 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <limits>
|
#include <xxhash.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "core/libraries/videoout/buffer.h"
|
|
||||||
#include "core/virtual_memory.h"
|
#include "core/virtual_memory.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
@ -137,6 +136,21 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) {
|
||||||
return image;
|
return image;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
|
||||||
|
Image& image = FindImage(ImageInfo{desc}, desc.Address());
|
||||||
|
|
||||||
|
const ImageViewInfo view_info{desc};
|
||||||
|
if (const ImageViewId view_id = image.FindView(view_info); view_id) {
|
||||||
|
return slot_image_views[view_id];
|
||||||
|
}
|
||||||
|
|
||||||
|
const ImageViewId view_id =
|
||||||
|
slot_image_views.insert(instance, scheduler, view_info, image.image);
|
||||||
|
image.image_view_infos.emplace_back(view_info);
|
||||||
|
image.image_view_ids.emplace_back(view_id);
|
||||||
|
return slot_image_views[view_id];
|
||||||
|
}
|
||||||
|
|
||||||
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
|
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
|
||||||
const ImageInfo info{buffer};
|
const ImageInfo info{buffer};
|
||||||
auto& image = FindImage(info, buffer.Address());
|
auto& image = FindImage(info, buffer.Address());
|
||||||
|
@ -159,7 +173,7 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
// Upload data to the staging buffer.
|
// Upload data to the staging buffer.
|
||||||
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 0);
|
const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4);
|
||||||
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
|
||||||
if (image.info.is_tiled) {
|
if (image.info.is_tiled) {
|
||||||
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
|
ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
|
||||||
|
@ -202,6 +216,12 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||||
|
const u64 hash = XXH3_64bits(&sampler, sizeof(sampler));
|
||||||
|
const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler);
|
||||||
|
return it->second.Handle();
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCache::RegisterImage(ImageId image_id) {
|
void TextureCache::RegisterImage(ImageId image_id) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
||||||
|
|
|
@ -7,9 +7,11 @@
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
|
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
#include "video_core/texture_cache/image.h"
|
#include "video_core/texture_cache/image.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
|
#include "video_core/texture_cache/sampler.h"
|
||||||
#include "video_core/texture_cache/slot_vector.h"
|
#include "video_core/texture_cache/slot_vector.h"
|
||||||
|
|
||||||
namespace Core::Libraries::VideoOut {
|
namespace Core::Libraries::VideoOut {
|
||||||
|
@ -36,12 +38,18 @@ public:
|
||||||
/// Retrieves the image handle of the image with the provided attributes and address.
|
/// Retrieves the image handle of the image with the provided attributes and address.
|
||||||
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
|
Image& FindImage(const ImageInfo& info, VAddr cpu_address);
|
||||||
|
|
||||||
|
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||||
|
ImageView& FindImageView(const AmdGpu::Image& image);
|
||||||
|
|
||||||
/// Retrieves the render target with specified properties
|
/// Retrieves the render target with specified properties
|
||||||
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
|
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
|
||||||
|
|
||||||
/// Reuploads image contents.
|
/// Reuploads image contents.
|
||||||
void RefreshImage(Image& image);
|
void RefreshImage(Image& image);
|
||||||
|
|
||||||
|
/// Retrieves the sampler that matches the provided S# descriptor.
|
||||||
|
vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Iterate over all page indices in a range
|
/// Iterate over all page indices in a range
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
|
@ -121,6 +129,7 @@ private:
|
||||||
Vulkan::StreamBuffer staging;
|
Vulkan::StreamBuffer staging;
|
||||||
SlotVector<Image> slot_images;
|
SlotVector<Image> slot_images;
|
||||||
SlotVector<ImageView> slot_image_views;
|
SlotVector<ImageView> slot_image_views;
|
||||||
|
tsl::robin_map<u64, Sampler> samplers;
|
||||||
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
||||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include "common/assert.h"
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
Loading…
Reference in New Issue