video_core: Crucial buffer cache fixes + proper GPU clears (#414)
* translator: Use templates for stronger type guarantees * spirv: Define buffer offsets upfront * Saves a lot of shader instructions * buffer_cache: Use dynamic vertex input when available * Fixes issues when games like dark souls rebind vertex buffers with different stride * externals: Update boost * spirv: Use runtime array for ssbos * ssbos can be large and typically their size will vary, especially in generic copy/clear cs shaders * fs: Lock when doing case insensitive search * Dark Souls does fs lookups from different threads * texture_cache: More precise invalidation from compute * Fixes unrelated render targets being cleared * texture_cache: Use hashes for protect gpu modified images from reupload * translator: Treat V_CNDMASK as float * Sometimes it can have input modifiers. Worst this will cause is some extra calls to uintBitsToFloat and opposite. But most often this is used as float anyway * translator: Small optimization for V_SAD_U32 * Fix review * clang format
This commit is contained in:
parent
dfcfd62d4f
commit
1fb0da9b89
|
@ -1 +1 @@
|
||||||
Subproject commit 147b2de7734f5dc3b9aeb1f4135ae15fcd44b9d7
|
Subproject commit a04136add1e469f46d8ae8d3e8307779240a5c53
|
|
@ -54,6 +54,7 @@ std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory)
|
||||||
|
|
||||||
// If the path does not exist attempt to verify this.
|
// If the path does not exist attempt to verify this.
|
||||||
// Retrieve parent path until we find one that exists.
|
// Retrieve parent path until we find one that exists.
|
||||||
|
std::scoped_lock lk{m_mutex};
|
||||||
path_parts.clear();
|
path_parts.clear();
|
||||||
auto current_path = host_path;
|
auto current_path = host_path;
|
||||||
while (!std::filesystem::exists(current_path)) {
|
while (!std::filesystem::exists(current_path)) {
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/kernel/thread_management.h"
|
|
||||||
#include "core/libraries/libs.h"
|
#include "core/libraries/libs.h"
|
||||||
|
|
||||||
namespace Libraries::Kernel {
|
namespace Libraries::Kernel {
|
||||||
|
@ -82,7 +81,6 @@ public:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
struct WaitingThread : public ListBaseHook {
|
struct WaitingThread : public ListBaseHook {
|
||||||
std::string name;
|
|
||||||
std::condition_variable cv;
|
std::condition_variable cv;
|
||||||
u32 priority;
|
u32 priority;
|
||||||
s32 need_count;
|
s32 need_count;
|
||||||
|
@ -90,7 +88,6 @@ public:
|
||||||
bool was_cancled{};
|
bool was_cancled{};
|
||||||
|
|
||||||
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
|
explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} {
|
||||||
name = scePthreadSelf()->name;
|
|
||||||
if (is_fifo) {
|
if (is_fifo) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,11 +128,7 @@ Id EmitReadConst(EmitContext& ctx) {
|
||||||
|
|
||||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
if (!Sirit::ValidId(buffer.offset)) {
|
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
|
||||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
|
||||||
}
|
|
||||||
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
|
|
||||||
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
|
|
||||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||||
}
|
}
|
||||||
|
@ -229,9 +225,6 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
|
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
if (!Sirit::ValidId(buffer.offset)) {
|
|
||||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
|
||||||
}
|
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
|
@ -404,9 +397,6 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
if (!Sirit::ValidId(buffer.offset)) {
|
|
||||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
|
||||||
}
|
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
return GetBufferFormatValue(ctx, handle, address, 0);
|
return GetBufferFormatValue(ctx, handle, address, 0);
|
||||||
|
@ -438,9 +428,6 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||||
auto& buffer = ctx.buffers[handle];
|
auto& buffer = ctx.buffers[handle];
|
||||||
if (!Sirit::ValidId(buffer.offset)) {
|
|
||||||
buffer.offset = ctx.GetBufferOffset(buffer.global_binding);
|
|
||||||
}
|
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||||
if constexpr (N == 1) {
|
if constexpr (N == 1) {
|
||||||
|
|
|
@ -6,7 +6,9 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
void EmitPrologue(EmitContext& ctx) {}
|
void EmitPrologue(EmitContext& ctx) {
|
||||||
|
ctx.DefineBufferOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
void EmitEpilogue(EmitContext& ctx) {}
|
void EmitEpilogue(EmitContext& ctx) {}
|
||||||
|
|
||||||
|
|
|
@ -165,14 +165,18 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
||||||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitContext::GetBufferOffset(u32 binding) {
|
void EmitContext::DefineBufferOffsets() {
|
||||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
for (auto& buffer : buffers) {
|
||||||
const u32 comp = (binding & 0xf) >> 2;
|
const u32 binding = buffer.binding;
|
||||||
const u32 offset = (binding & 0x3) << 3;
|
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
const u32 comp = (binding & 0xf) >> 2;
|
||||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
const u32 offset = (binding & 0x3) << 3;
|
||||||
const Id value{OpLoad(U32[1], ptr)};
|
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||||
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||||
|
const Id value{OpLoad(U32[1], ptr)};
|
||||||
|
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||||
|
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||||
|
@ -327,7 +331,9 @@ void EmitContext::DefineBuffers() {
|
||||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||||
const Id data_type = (*data_types)[1];
|
const Id data_type = (*data_types)[1];
|
||||||
const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))};
|
const Id record_array_type{buffer.is_storage
|
||||||
|
? TypeRuntimeArray(data_type)
|
||||||
|
: TypeArray(data_type, ConstU32(buffer.length))};
|
||||||
const Id struct_type{TypeStruct(record_array_type)};
|
const Id struct_type{TypeStruct(record_array_type)};
|
||||||
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
|
||||||
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
|
||||||
|
@ -354,7 +360,7 @@ void EmitContext::DefineBuffers() {
|
||||||
|
|
||||||
buffers.push_back({
|
buffers.push_back({
|
||||||
.id = id,
|
.id = id,
|
||||||
.global_binding = binding++,
|
.binding = binding++,
|
||||||
.data_types = data_types,
|
.data_types = data_types,
|
||||||
.pointer_type = pointer_type,
|
.pointer_type = pointer_type,
|
||||||
.buffer = buffer.GetVsharp(info),
|
.buffer = buffer.GetVsharp(info),
|
||||||
|
|
|
@ -40,7 +40,7 @@ public:
|
||||||
~EmitContext();
|
~EmitContext();
|
||||||
|
|
||||||
Id Def(const IR::Value& value);
|
Id Def(const IR::Value& value);
|
||||||
Id GetBufferOffset(u32 binding);
|
void DefineBufferOffsets();
|
||||||
|
|
||||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||||
|
@ -203,7 +203,8 @@ public:
|
||||||
struct BufferDefinition {
|
struct BufferDefinition {
|
||||||
Id id;
|
Id id;
|
||||||
Id offset;
|
Id offset;
|
||||||
u32 global_binding;
|
Id offset_dwords;
|
||||||
|
u32 binding;
|
||||||
const VectorIds* data_types;
|
const VectorIds* data_types;
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
AmdGpu::Buffer buffer;
|
AmdGpu::Buffer buffer;
|
||||||
|
|
|
@ -73,101 +73,190 @@ void Translator::EmitPrologue() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <typename T>
|
||||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
T Translator::GetSrc(const InstOperand& operand) {
|
||||||
IR::U32F32 value{};
|
constexpr bool is_float = std::is_same_v<T, IR::F32>;
|
||||||
|
|
||||||
const bool is_float = operand.type == ScalarType::Float32 || force_flt;
|
const auto get_imm = [&](auto value) -> T {
|
||||||
|
if constexpr (is_float) {
|
||||||
|
return ir.Imm32(std::bit_cast<float>(value));
|
||||||
|
} else {
|
||||||
|
return ir.Imm32(std::bit_cast<u32>(value));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
T value{};
|
||||||
switch (operand.field) {
|
switch (operand.field) {
|
||||||
case OperandField::ScalarGPR:
|
case OperandField::ScalarGPR:
|
||||||
if (is_float) {
|
value = ir.GetScalarReg<T>(IR::ScalarReg(operand.code));
|
||||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
|
||||||
} else {
|
|
||||||
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::VectorGPR:
|
case OperandField::VectorGPR:
|
||||||
if (is_float) {
|
value = ir.GetVectorReg<T>(IR::VectorReg(operand.code));
|
||||||
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
|
||||||
} else {
|
|
||||||
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstZero:
|
case OperandField::ConstZero:
|
||||||
if (is_float) {
|
value = get_imm(0U);
|
||||||
value = ir.Imm32(0.f);
|
|
||||||
} else {
|
|
||||||
value = ir.Imm32(0U);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::SignedConstIntPos:
|
case OperandField::SignedConstIntPos:
|
||||||
ASSERT(!force_flt);
|
value = get_imm(operand.code - SignedConstIntPosMin + 1);
|
||||||
value = ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
|
||||||
break;
|
break;
|
||||||
case OperandField::SignedConstIntNeg:
|
case OperandField::SignedConstIntNeg:
|
||||||
ASSERT(!force_flt);
|
value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||||
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
|
||||||
break;
|
break;
|
||||||
case OperandField::LiteralConst:
|
case OperandField::LiteralConst:
|
||||||
if (is_float) {
|
value = get_imm(operand.code);
|
||||||
value = ir.Imm32(std::bit_cast<float>(operand.code));
|
|
||||||
} else {
|
|
||||||
value = ir.Imm32(operand.code);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatPos_1_0:
|
case OperandField::ConstFloatPos_1_0:
|
||||||
if (is_float) {
|
value = get_imm(1.f);
|
||||||
value = ir.Imm32(1.f);
|
|
||||||
} else {
|
|
||||||
value = ir.Imm32(std::bit_cast<u32>(1.f));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatPos_0_5:
|
case OperandField::ConstFloatPos_0_5:
|
||||||
value = ir.Imm32(0.5f);
|
value = get_imm(0.5f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatPos_2_0:
|
case OperandField::ConstFloatPos_2_0:
|
||||||
value = ir.Imm32(2.0f);
|
value = get_imm(2.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatPos_4_0:
|
case OperandField::ConstFloatPos_4_0:
|
||||||
value = ir.Imm32(4.0f);
|
value = get_imm(4.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatNeg_0_5:
|
case OperandField::ConstFloatNeg_0_5:
|
||||||
value = ir.Imm32(-0.5f);
|
value = get_imm(-0.5f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatNeg_1_0:
|
case OperandField::ConstFloatNeg_1_0:
|
||||||
if (is_float) {
|
value = get_imm(-1.0f);
|
||||||
value = ir.Imm32(-1.0f);
|
|
||||||
} else {
|
|
||||||
value = ir.Imm32(std::bit_cast<u32>(-1.0f));
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatNeg_2_0:
|
case OperandField::ConstFloatNeg_2_0:
|
||||||
value = ir.Imm32(-2.0f);
|
value = get_imm(-2.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::ConstFloatNeg_4_0:
|
case OperandField::ConstFloatNeg_4_0:
|
||||||
value = ir.Imm32(-4.0f);
|
value = get_imm(-4.0f);
|
||||||
break;
|
break;
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
if (force_flt) {
|
if constexpr (is_float) {
|
||||||
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
||||||
} else {
|
} else {
|
||||||
value = ir.GetVccLo();
|
value = ir.GetVccLo();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OperandField::VccHi:
|
case OperandField::VccHi:
|
||||||
if (force_flt) {
|
if constexpr (is_float) {
|
||||||
value = ir.BitCast<IR::F32>(ir.GetVccHi());
|
value = ir.BitCast<IR::F32>(ir.GetVccHi());
|
||||||
} else {
|
} else {
|
||||||
value = ir.GetVccHi();
|
value = ir.GetVccHi();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OperandField::M0:
|
case OperandField::M0:
|
||||||
return m0_value;
|
if constexpr (is_float) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else {
|
||||||
|
return m0_value;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_float) {
|
if constexpr (is_float) {
|
||||||
|
if (operand.input_modifier.abs) {
|
||||||
|
value = ir.FPAbs(value);
|
||||||
|
}
|
||||||
|
if (operand.input_modifier.neg) {
|
||||||
|
value = ir.FPNeg(value);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (operand.input_modifier.abs) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
if (operand.input_modifier.neg) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template IR::U32 Translator::GetSrc<IR::U32>(const InstOperand&);
|
||||||
|
template IR::F32 Translator::GetSrc<IR::F32>(const InstOperand&);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Translator::GetSrc64(const InstOperand& operand) {
|
||||||
|
constexpr bool is_float = std::is_same_v<T, IR::F64>;
|
||||||
|
|
||||||
|
const auto get_imm = [&](auto value) -> T {
|
||||||
|
if constexpr (is_float) {
|
||||||
|
return ir.Imm64(std::bit_cast<double>(value));
|
||||||
|
} else {
|
||||||
|
return ir.Imm64(std::bit_cast<u64>(value));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
T value{};
|
||||||
|
switch (operand.field) {
|
||||||
|
case OperandField::ScalarGPR: {
|
||||||
|
const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code));
|
||||||
|
const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1));
|
||||||
|
if constexpr (is_float) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else {
|
||||||
|
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OperandField::VectorGPR: {
|
||||||
|
const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code));
|
||||||
|
const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1));
|
||||||
|
if constexpr (is_float) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else {
|
||||||
|
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case OperandField::ConstZero:
|
||||||
|
value = get_imm(0ULL);
|
||||||
|
break;
|
||||||
|
case OperandField::SignedConstIntPos:
|
||||||
|
value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1);
|
||||||
|
break;
|
||||||
|
case OperandField::SignedConstIntNeg:
|
||||||
|
value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1);
|
||||||
|
break;
|
||||||
|
case OperandField::LiteralConst:
|
||||||
|
value = get_imm(u64(operand.code));
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatPos_1_0:
|
||||||
|
value = get_imm(1.0);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatPos_0_5:
|
||||||
|
value = get_imm(0.5);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatPos_2_0:
|
||||||
|
value = get_imm(2.0);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatPos_4_0:
|
||||||
|
value = get_imm(4.0);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatNeg_0_5:
|
||||||
|
value = get_imm(-0.5);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatNeg_1_0:
|
||||||
|
value = get_imm(-1.0);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatNeg_2_0:
|
||||||
|
value = get_imm(-2.0);
|
||||||
|
break;
|
||||||
|
case OperandField::ConstFloatNeg_4_0:
|
||||||
|
value = get_imm(-4.0);
|
||||||
|
break;
|
||||||
|
case OperandField::VccLo:
|
||||||
|
if constexpr (is_float) {
|
||||||
|
UNREACHABLE();
|
||||||
|
} else {
|
||||||
|
value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OperandField::VccHi:
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (is_float) {
|
||||||
if (operand.input_modifier.abs) {
|
if (operand.input_modifier.abs) {
|
||||||
value = ir.FPAbs(value);
|
value = ir.FPAbs(value);
|
||||||
}
|
}
|
||||||
|
@ -178,148 +267,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template IR::U64 Translator::GetSrc64<IR::U64>(const InstOperand&);
|
||||||
IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
template IR::F64 Translator::GetSrc64<IR::F64>(const InstOperand&);
|
||||||
return GetSrc<IR::U32F32>(operand, force_flt);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
IR::F32 Translator::GetSrc(const InstOperand& operand, bool) {
|
|
||||||
return GetSrc<IR::U32F32>(operand, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
|
||||||
IR::Value value_hi{};
|
|
||||||
IR::Value value_lo{};
|
|
||||||
|
|
||||||
bool immediate = false;
|
|
||||||
const bool is_float = operand.type == ScalarType::Float64 || force_flt;
|
|
||||||
switch (operand.field) {
|
|
||||||
case OperandField::ScalarGPR:
|
|
||||||
if (is_float) {
|
|
||||||
value_lo = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
|
||||||
value_hi = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code + 1));
|
|
||||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
|
||||||
value_lo = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
|
||||||
value_hi = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code + 1));
|
|
||||||
} else {
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OperandField::VectorGPR:
|
|
||||||
if (is_float) {
|
|
||||||
value_lo = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
|
||||||
value_hi = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code + 1));
|
|
||||||
} else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) {
|
|
||||||
value_lo = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
|
||||||
value_hi = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code + 1));
|
|
||||||
} else {
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OperandField::ConstZero:
|
|
||||||
immediate = true;
|
|
||||||
if (force_flt) {
|
|
||||||
value_lo = ir.Imm64(0.0);
|
|
||||||
} else {
|
|
||||||
value_lo = ir.Imm64(u64(0U));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OperandField::SignedConstIntPos:
|
|
||||||
ASSERT(!force_flt);
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1);
|
|
||||||
break;
|
|
||||||
case OperandField::SignedConstIntNeg:
|
|
||||||
ASSERT(!force_flt);
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1);
|
|
||||||
break;
|
|
||||||
case OperandField::LiteralConst:
|
|
||||||
immediate = true;
|
|
||||||
if (force_flt) {
|
|
||||||
UNREACHABLE(); // There is a literal double?
|
|
||||||
} else {
|
|
||||||
value_lo = ir.Imm64(u64(operand.code));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatPos_1_0:
|
|
||||||
immediate = true;
|
|
||||||
if (force_flt) {
|
|
||||||
value_lo = ir.Imm64(1.0);
|
|
||||||
} else {
|
|
||||||
value_lo = ir.Imm64(std::bit_cast<u64>(f64(1.0)));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatPos_0_5:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(0.5);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatPos_2_0:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(2.0);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatPos_4_0:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(4.0);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatNeg_0_5:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(-0.5);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatNeg_1_0:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(-1.0);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatNeg_2_0:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(-2.0);
|
|
||||||
break;
|
|
||||||
case OperandField::ConstFloatNeg_4_0:
|
|
||||||
immediate = true;
|
|
||||||
value_lo = ir.Imm64(-4.0);
|
|
||||||
break;
|
|
||||||
case OperandField::VccLo: {
|
|
||||||
value_lo = ir.GetVccLo();
|
|
||||||
value_hi = ir.GetVccHi();
|
|
||||||
} break;
|
|
||||||
case OperandField::VccHi:
|
|
||||||
UNREACHABLE();
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
IR::Value value;
|
|
||||||
|
|
||||||
if (immediate) {
|
|
||||||
value = value_lo;
|
|
||||||
} else if (is_float) {
|
|
||||||
throw NotImplementedException("required OpPackDouble2x32 implementation");
|
|
||||||
} else {
|
|
||||||
IR::Value packed = ir.CompositeConstruct(value_lo, value_hi);
|
|
||||||
value = ir.PackUint2x32(packed);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_float) {
|
|
||||||
if (operand.input_modifier.abs) {
|
|
||||||
value = ir.FPAbs(IR::F32F64(value));
|
|
||||||
}
|
|
||||||
if (operand.input_modifier.neg) {
|
|
||||||
value = ir.FPNeg(IR::F32F64(value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return IR::U64F64(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) {
|
|
||||||
return GetSrc64<IR::U64F64>(operand, force_flt);
|
|
||||||
}
|
|
||||||
template <>
|
|
||||||
IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) {
|
|
||||||
return GetSrc64<IR::U64F64>(operand, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||||
IR::U32F32 result = value;
|
IR::U32F32 result = value;
|
||||||
|
|
|
@ -211,10 +211,10 @@ public:
|
||||||
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T = IR::U32F32>
|
template <typename T = IR::U32>
|
||||||
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
|
[[nodiscard]] T GetSrc(const InstOperand& operand);
|
||||||
template <typename T = IR::U64F64>
|
template <typename T = IR::U64>
|
||||||
[[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false);
|
[[nodiscard]] T GetSrc64(const InstOperand& operand);
|
||||||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||||
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/profile.h"
|
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
|
@ -312,7 +311,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MOV(const GcnInst& inst) {
|
void Translator::V_MOV(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_SAD(const GcnInst& inst) {
|
void Translator::V_SAD(const GcnInst& inst) {
|
||||||
|
@ -321,14 +320,14 @@ void Translator::V_SAD(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAC_F32(const GcnInst& inst) {
|
void Translator::V_MAC_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true),
|
SetDst(inst.dst[0], ir.FPFma(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]),
|
||||||
GetSrc(inst.dst[0], true)));
|
GetSrc<IR::F32>(inst.dst[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
const IR::Value vec_f32 =
|
const IR::Value vec_f32 =
|
||||||
ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true));
|
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||||
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -339,13 +338,13 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0 = GetSrc(inst.src[0], true);
|
const IR::F32 src0 = GetSrc<IR::F32>(inst.src[0]);
|
||||||
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
|
||||||
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
|
@ -354,24 +353,8 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
const IR::U1 flag = inst.src[2].field == OperandField::ScalarGPR
|
||||||
? ir.GetThreadBitScalarReg(flag_reg)
|
? ir.GetThreadBitScalarReg(flag_reg)
|
||||||
: ir.GetVcc();
|
: ir.GetVcc();
|
||||||
|
const IR::Value result =
|
||||||
// We can treat the instruction as integer most of the time, but when a source is
|
ir.Select(flag, GetSrc<IR::F32>(inst.src[1]), GetSrc<IR::F32>(inst.src[0]));
|
||||||
// a floating point constant we will force the other as float for better readability
|
|
||||||
// The other operand is also higly likely to be float as well.
|
|
||||||
const auto is_float_const = [](OperandField field) {
|
|
||||||
return field >= OperandField::ConstFloatPos_0_5 && field <= OperandField::ConstFloatNeg_4_0;
|
|
||||||
};
|
|
||||||
const bool has_flt_source =
|
|
||||||
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
|
||||||
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
|
||||||
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
|
||||||
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
|
||||||
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
|
||||||
}
|
|
||||||
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
|
|
||||||
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
|
|
||||||
}
|
|
||||||
const IR::Value result = ir.Select(flag, src1, src0);
|
|
||||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,21 +431,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAD_F32(const GcnInst& inst) {
|
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -476,9 +459,9 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||||
}
|
}
|
||||||
|
@ -492,32 +475,32 @@ void Translator::V_MED3_I32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_SUB_F32(const GcnInst& inst) {
|
void Translator::V_SUB_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_RCP_F32(const GcnInst& inst) {
|
void Translator::V_RCP_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_FMA_F32(const GcnInst& inst) {
|
void Translator::V_FMA_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::U1 result = [&] {
|
const IR::U1 result = [&] {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case ConditionOp::F:
|
case ConditionOp::F:
|
||||||
|
@ -557,8 +540,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
|
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
|
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,40 +552,40 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_RSQ_F32(const GcnInst& inst) {
|
void Translator::V_RSQ_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_SIN_F32(const GcnInst& inst) {
|
void Translator::V_SIN_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPSin(src0));
|
SetDst(inst.dst[0], ir.FPSin(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_LOG_F32(const GcnInst& inst) {
|
void Translator::V_LOG_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPLog2(src0));
|
SetDst(inst.dst[0], ir.FPLog2(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_EXP_F32(const GcnInst& inst) {
|
void Translator::V_EXP_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPExp2(src0));
|
SetDst(inst.dst[0], ir.FPExp2(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_SQRT_F32(const GcnInst& inst) {
|
void Translator::V_SQRT_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPSqrt(src0));
|
SetDst(inst.dst[0], ir.FPSqrt(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
|
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
|
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MIN3_F32(const GcnInst& inst) {
|
void Translator::V_MIN3_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -614,9 +597,9 @@ void Translator::V_MIN3_I32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 k{GetSrc(inst.src[2], true)};
|
const IR::F32 k{GetSrc<IR::F32>(inst.src[2])};
|
||||||
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
|
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -625,25 +608,25 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[0], true));
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[1], true));
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[2], true));
|
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
|
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
|
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_SUBREV_F32(const GcnInst& inst) {
|
void Translator::V_SUBREV_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPSub(src1, src0));
|
SetDst(inst.dst[0], ir.FPSub(src1, src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -727,9 +710,17 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||||
const IR::U32 max{ir.IMax(src0, src1, false)};
|
IR::U32 result;
|
||||||
const IR::U32 min{ir.IMin(src0, src1, false)};
|
if (src0.IsImmediate() && src0.U32() == 0U) {
|
||||||
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
|
result = src1;
|
||||||
|
} else if (src1.IsImmediate() && src1.U32() == 0U) {
|
||||||
|
result = src0;
|
||||||
|
} else {
|
||||||
|
const IR::U32 max{ir.IMax(src0, src1, false)};
|
||||||
|
const IR::U32 min{ir.IMin(src0, src1, false)};
|
||||||
|
result = ir.ISub(max, min);
|
||||||
|
}
|
||||||
|
SetDst(inst.dst[0], ir.IAdd(result, src2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||||
|
@ -783,7 +774,7 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_RNDNE_F32(const GcnInst& inst) {
|
void Translator::V_RNDNE_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPRoundEven(src0));
|
SetDst(inst.dst[0], ir.FPRoundEven(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -794,14 +785,14 @@ void Translator::V_BCNT_U32_B32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_COS_F32(const GcnInst& inst) {
|
void Translator::V_COS_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPCos(src0));
|
SetDst(inst.dst[0], ir.FPCos(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_MAX3_F32(const GcnInst& inst) {
|
void Translator::V_MAX3_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
const IR::F32 src2{GetSrc<IR::F32>(inst.src[2])};
|
||||||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -813,7 +804,7 @@ void Translator::V_MAX3_U32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -830,12 +821,12 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
void Translator::V_TRUNC_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
SetDst(inst.dst[0], ir.FPTrunc(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
void Translator::V_CEIL_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.FPCeil(src0));
|
SetDst(inst.dst[0], ir.FPCeil(src0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -899,18 +890,18 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||||
const IR::F32F64 src0{GetSrc(inst.src[0])};
|
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
IR::U1 value;
|
IR::U1 value;
|
||||||
if (src1.IsImmediate()) {
|
if (src1.IsImmediate()) {
|
||||||
|
|
|
@ -87,6 +87,15 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||||
|
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
|
||||||
|
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
|
||||||
|
SCOPE_EXIT {
|
||||||
|
if (instance.IsVertexInputDynamicState()) {
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.setVertexInputEXT(bindings, attributes);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (vs_info.vs_inputs.empty()) {
|
if (vs_info.vs_inputs.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -122,6 +131,21 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||||
}
|
}
|
||||||
guest_buffers.emplace_back(buffer);
|
guest_buffers.emplace_back(buffer);
|
||||||
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
|
||||||
|
attributes.push_back({
|
||||||
|
.location = input.binding,
|
||||||
|
.binding = input.binding,
|
||||||
|
.format =
|
||||||
|
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
|
||||||
|
.offset = 0,
|
||||||
|
});
|
||||||
|
bindings.push_back({
|
||||||
|
.binding = input.binding,
|
||||||
|
.stride = buffer.GetStride(),
|
||||||
|
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None
|
||||||
|
? vk::VertexInputRate::eVertex
|
||||||
|
: vk::VertexInputRate::eInstance,
|
||||||
|
.divisor = 1,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||||
|
@ -224,6 +248,19 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
||||||
return {&buffer, buffer.Offset(device_addr)};
|
return {&buffer, buffer.Offset(device_addr)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<const Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
|
||||||
|
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||||
|
const BufferId buffer_id = page_table[page];
|
||||||
|
if (buffer_id) {
|
||||||
|
const Buffer& buffer = slot_buffers[buffer_id];
|
||||||
|
if (buffer.IsInBounds(gpu_addr, size)) {
|
||||||
|
return {&buffer, buffer.Offset(gpu_addr)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
|
||||||
|
return {&staging_buffer, offset};
|
||||||
|
}
|
||||||
|
|
||||||
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||||
const VAddr end_addr = addr + size;
|
const VAddr end_addr = addr + size;
|
||||||
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
|
||||||
|
@ -248,6 +285,10 @@ bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||||
return memory_tracker.IsRegionCpuModified(addr, size);
|
return memory_tracker.IsRegionCpuModified(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
|
return memory_tracker.IsRegionGpuModified(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
||||||
if (device_addr == 0) {
|
if (device_addr == 0) {
|
||||||
return NULL_BUFFER_ID;
|
return NULL_BUFFER_ID;
|
||||||
|
|
|
@ -69,12 +69,18 @@ public:
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written);
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written);
|
||||||
|
|
||||||
|
/// Obtains a temporary buffer for usage in texture cache.
|
||||||
|
[[nodiscard]] std::pair<const Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
|
||||||
|
|
||||||
/// Return true when a region is registered on the cache
|
/// Return true when a region is registered on the cache
|
||||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||||
|
|
||||||
/// Return true when a CPU region is modified from the CPU
|
/// Return true when a CPU region is modified from the CPU
|
||||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
|
/// Return true when a CPU region is modified from the GPU
|
||||||
|
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||||
|
|
|
@ -47,7 +47,7 @@ public:
|
||||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||||
VAddr cpu_address, bool is_eop) {
|
VAddr cpu_address, bool is_eop) {
|
||||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||||
const auto image_id = texture_cache.FindImage(info, false);
|
const auto image_id = texture_cache.FindImage(info);
|
||||||
auto& image = texture_cache.GetImage(image_id);
|
auto& image = texture_cache.GetImage(image_id);
|
||||||
return PrepareFrameInternal(image, is_eop);
|
return PrepareFrameInternal(image, is_eop);
|
||||||
}
|
}
|
||||||
|
@ -61,7 +61,7 @@ public:
|
||||||
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
|
||||||
vo_buffers_addr.emplace_back(cpu_address);
|
vo_buffers_addr.emplace_back(cpu_address);
|
||||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||||
const auto image_id = texture_cache.FindImage(info, false);
|
const auto image_id = texture_cache.FindImage(info);
|
||||||
return texture_cache.GetImage(image_id);
|
return texture_cache.GetImage(image_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||||
Shader::PushData push_data{};
|
Shader::PushData push_data{};
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
||||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
for (const auto& buffer : info.buffers) {
|
||||||
const auto vsharp = buffer.GetVsharp(info);
|
const auto vsharp = buffer.GetVsharp(info);
|
||||||
const VAddr address = vsharp.base_address;
|
const VAddr address = vsharp.base_address;
|
||||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
||||||
|
@ -115,7 +115,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||||
}
|
}
|
||||||
const u32 size = vsharp.GetSize();
|
const u32 size = vsharp.GetSize();
|
||||||
if (buffer.is_written) {
|
if (buffer.is_written) {
|
||||||
texture_cache.InvalidateMemory(address, size);
|
texture_cache.InvalidateMemory(address, size, true);
|
||||||
}
|
}
|
||||||
const u32 alignment =
|
const u32 alignment =
|
||||||
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||||
|
@ -137,7 +137,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||||
: vk::DescriptorType::eUniformBuffer,
|
: vk::DescriptorType::eUniformBuffer,
|
||||||
.pBufferInfo = &buffer_infos.back(),
|
.pBufferInfo = &buffer_infos.back(),
|
||||||
});
|
});
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto& image_desc : info.images) {
|
for (const auto& image_desc : info.images) {
|
||||||
|
|
|
@ -145,6 +145,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||||
dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
|
dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
|
||||||
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
|
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
|
||||||
}
|
}
|
||||||
|
if (instance.IsVertexInputDynamicState()) {
|
||||||
|
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
|
||||||
|
}
|
||||||
|
|
||||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||||
|
|
|
@ -202,6 +202,8 @@ bool Instance::CreateDevice() {
|
||||||
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||||
workgroup_memory_explicit_layout =
|
workgroup_memory_explicit_layout =
|
||||||
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||||
|
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||||
|
|
||||||
// The next two extensions are required to be available together in order to support write masks
|
// The next two extensions are required to be available together in order to support write masks
|
||||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||||
|
@ -319,6 +321,9 @@ bool Instance::CreateDevice() {
|
||||||
vk::PhysicalDeviceSynchronization2Features{
|
vk::PhysicalDeviceSynchronization2Features{
|
||||||
.synchronization2 = true,
|
.synchronization2 = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
|
||||||
|
.vertexInputDynamicState = true,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!color_write_en) {
|
if (!color_write_en) {
|
||||||
|
@ -331,8 +336,8 @@ bool Instance::CreateDevice() {
|
||||||
} else {
|
} else {
|
||||||
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
||||||
}
|
}
|
||||||
if (!has_sync2) {
|
if (!vertex_input_dynamic_state) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceSynchronization2Features>();
|
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -132,6 +132,11 @@ public:
|
||||||
return color_write_en;
|
return color_write_en;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
|
||||||
|
bool IsVertexInputDynamicState() const {
|
||||||
|
return vertex_input_dynamic_state;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the vendor ID of the physical device
|
/// Returns the vendor ID of the physical device
|
||||||
u32 GetVendorID() const {
|
u32 GetVendorID() const {
|
||||||
return properties.vendorID;
|
return properties.vendorID;
|
||||||
|
@ -257,6 +262,7 @@ private:
|
||||||
bool external_memory_host{};
|
bool external_memory_host{};
|
||||||
bool workgroup_memory_explicit_layout{};
|
bool workgroup_memory_explicit_layout{};
|
||||||
bool color_write_en{};
|
bool color_write_en{};
|
||||||
|
bool vertex_input_dynamic_state{};
|
||||||
u64 min_imported_host_pointer_alignment{};
|
u64 min_imported_host_pointer_alignment{};
|
||||||
u32 subgroup_size{};
|
u32 subgroup_size{};
|
||||||
bool tooling_info{};
|
bool tooling_info{};
|
||||||
|
|
|
@ -209,6 +209,10 @@ void PipelineCache::RefreshGraphicsKey() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||||
|
if (!bininfo->Valid()) {
|
||||||
|
key.stage_hashes[i] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
key.stage_hashes[i] = bininfo->shader_hash;
|
key.stage_hashes[i] = bininfo->shader_hash;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,6 +117,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
|
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
|
||||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||||
|
mip_hashes.resize(info.resources.levels);
|
||||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||||
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
|
||||||
// the texture cache should re-create the resource with the usage requested
|
// the texture cache should re-create the resource with the usage requested
|
||||||
|
|
|
@ -111,6 +111,7 @@ struct Image {
|
||||||
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
vk::Flags<vk::PipelineStageFlagBits> pl_stage = vk::PipelineStageFlagBits::eAllCommands;
|
||||||
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
|
vk::Flags<vk::AccessFlagBits> access_mask = vk::AccessFlagBits::eNone;
|
||||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||||
|
boost::container::small_vector<u64, 14> mip_hashes;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <xxhash.h>
|
#include <xxhash.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
@ -11,13 +12,11 @@
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
static constexpr u64 StreamBufferSize = 512_MB;
|
|
||||||
static constexpr u64 PageShift = 12;
|
static constexpr u64 PageShift = 12;
|
||||||
|
|
||||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
BufferCache& buffer_cache_, PageManager& tracker_)
|
BufferCache& buffer_cache_, PageManager& tracker_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
||||||
staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize},
|
|
||||||
tile_manager{instance, scheduler} {
|
tile_manager{instance, scheduler} {
|
||||||
ImageInfo info;
|
ImageInfo info;
|
||||||
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
||||||
|
@ -31,9 +30,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||||
|
|
||||||
TextureCache::~TextureCache() = default;
|
TextureCache::~TextureCache() = default;
|
||||||
|
|
||||||
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
|
void TextureCache::InvalidateMemory(VAddr address, size_t size, bool from_compute) {
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
||||||
|
if (from_compute && !image.Overlaps(address, size)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Ensure image is reuploaded when accessed again.
|
// Ensure image is reuploaded when accessed again.
|
||||||
image.flags |= ImageFlagBits::CpuModified;
|
image.flags |= ImageFlagBits::CpuModified;
|
||||||
// Untrack image, so the range is unprotected and the guest can write freely.
|
// Untrack image, so the range is unprotected and the guest can write freely.
|
||||||
|
@ -57,7 +59,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
ImageId TextureCache::FindImage(const ImageInfo& info) {
|
||||||
if (info.guest_address == 0) [[unlikely]] {
|
if (info.guest_address == 0) [[unlikely]] {
|
||||||
return NULL_IMAGE_VIEW_ID;
|
return NULL_IMAGE_VIEW_ID;
|
||||||
}
|
}
|
||||||
|
@ -87,12 +89,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
||||||
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
Image& image = slot_images[image_id];
|
|
||||||
if (True(image.flags & ImageFlagBits::CpuModified) && refresh_on_create) {
|
|
||||||
RefreshImage(image);
|
|
||||||
TrackImage(image, image_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return image_id;
|
return image_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,6 +115,7 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
||||||
|
|
||||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||||
const ImageId image_id = FindImage(info);
|
const ImageId image_id = FindImage(info);
|
||||||
|
UpdateImage(image_id);
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
auto& usage = image.info.usage;
|
auto& usage = image.info.usage;
|
||||||
|
|
||||||
|
@ -165,7 +162,8 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
||||||
const ImageViewInfo& view_info) {
|
const ImageViewInfo& view_info) {
|
||||||
const ImageId image_id = FindImage(image_info);
|
const ImageId image_id = FindImage(image_info);
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags |= ImageFlagBits::GpuModified;
|
||||||
|
UpdateImage(image_id);
|
||||||
|
|
||||||
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
|
||||||
vk::AccessFlagBits::eColorAttachmentWrite |
|
vk::AccessFlagBits::eColorAttachmentWrite |
|
||||||
|
@ -198,8 +196,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info,
|
||||||
|
|
||||||
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
|
||||||
const ImageViewInfo& view_info) {
|
const ImageViewInfo& view_info) {
|
||||||
const ImageId image_id = FindImage(image_info, false);
|
const ImageId image_id = FindImage(image_info);
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
|
image.flags |= ImageFlagBits::GpuModified;
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||||
|
@ -228,22 +227,6 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
// Mark image as validated.
|
// Mark image as validated.
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
|
|
||||||
scheduler.EndRendering();
|
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
|
||||||
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite);
|
|
||||||
|
|
||||||
vk::Buffer buffer{staging.Handle()};
|
|
||||||
u32 offset{0};
|
|
||||||
|
|
||||||
auto upload_buffer = tile_manager.TryDetile(image);
|
|
||||||
if (upload_buffer) {
|
|
||||||
buffer = *upload_buffer;
|
|
||||||
} else {
|
|
||||||
// Upload data to the staging buffer.
|
|
||||||
offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& num_layers = image.info.resources.layers;
|
const auto& num_layers = image.info.resources.layers;
|
||||||
const auto& num_mips = image.info.resources.levels;
|
const auto& num_mips = image.info.resources.levels;
|
||||||
ASSERT(num_mips == image.info.mips_layout.size());
|
ASSERT(num_mips == image.info.mips_layout.size());
|
||||||
|
@ -254,12 +237,23 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
const u32 height = std::max(image.info.size.height >> m, 1u);
|
const u32 height = std::max(image.info.size.height >> m, 1u);
|
||||||
const u32 depth =
|
const u32 depth =
|
||||||
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
|
||||||
const auto& [_, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
|
||||||
|
|
||||||
|
// Protect GPU modified resources from accidental reuploads.
|
||||||
|
if (True(image.flags & ImageFlagBits::GpuModified) &&
|
||||||
|
!buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) {
|
||||||
|
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
|
||||||
|
const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size);
|
||||||
|
if (image.mip_hashes[m] == hash) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
image.mip_hashes[m] = hash;
|
||||||
|
}
|
||||||
|
|
||||||
image_copy.push_back({
|
image_copy.push_back({
|
||||||
.bufferOffset = offset + mip_ofs * num_layers,
|
.bufferOffset = mip_ofs * num_layers,
|
||||||
.bufferRowLength = static_cast<uint32_t>(mip_pitch),
|
.bufferRowLength = static_cast<u32>(mip_pitch),
|
||||||
.bufferImageHeight = static_cast<uint32_t>(mip_height),
|
.bufferImageHeight = static_cast<u32>(mip_height),
|
||||||
.imageSubresource{
|
.imageSubresource{
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
.mipLevel = m,
|
.mipLevel = m,
|
||||||
|
@ -271,6 +265,30 @@ void TextureCache::RefreshImage(Image& image) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (image_copy.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
|
||||||
|
|
||||||
|
const VAddr image_addr = image.info.guest_address;
|
||||||
|
const size_t image_size = image.info.guest_size_bytes;
|
||||||
|
vk::Buffer buffer{};
|
||||||
|
u32 offset{};
|
||||||
|
if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) {
|
||||||
|
buffer = *upload_buffer;
|
||||||
|
} else {
|
||||||
|
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
|
||||||
|
buffer = vk_buffer->Handle();
|
||||||
|
offset = buf_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& copy : image_copy) {
|
||||||
|
copy.bufferOffset += offset;
|
||||||
|
}
|
||||||
|
|
||||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,13 +38,13 @@ public:
|
||||||
~TextureCache();
|
~TextureCache();
|
||||||
|
|
||||||
/// Invalidates any image in the logical page range.
|
/// Invalidates any image in the logical page range.
|
||||||
void InvalidateMemory(VAddr address, size_t size);
|
void InvalidateMemory(VAddr address, size_t size, bool from_compute = false);
|
||||||
|
|
||||||
/// Evicts any images that overlap the unmapped range.
|
/// Evicts any images that overlap the unmapped range.
|
||||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
/// Retrieves the image handle of the image with the provided attributes.
|
/// Retrieves the image handle of the image with the provided attributes.
|
||||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
|
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
|
||||||
|
|
||||||
/// Retrieves an image view with the properties of the specified image descriptor.
|
/// Retrieves an image view with the properties of the specified image descriptor.
|
||||||
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
|
||||||
|
@ -58,6 +58,16 @@ public:
|
||||||
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info,
|
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info,
|
||||||
const ImageViewInfo& view_info);
|
const ImageViewInfo& view_info);
|
||||||
|
|
||||||
|
/// Updates image contents if it was modified by CPU.
|
||||||
|
void UpdateImage(ImageId image_id) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RefreshImage(image);
|
||||||
|
TrackImage(image, image_id);
|
||||||
|
}
|
||||||
|
|
||||||
/// Reuploads image contents.
|
/// Reuploads image contents.
|
||||||
void RefreshImage(Image& image);
|
void RefreshImage(Image& image);
|
||||||
|
|
||||||
|
@ -170,7 +180,6 @@ private:
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
BufferCache& buffer_cache;
|
BufferCache& buffer_cache;
|
||||||
PageManager& tracker;
|
PageManager& tracker;
|
||||||
StreamBuffer staging;
|
|
||||||
TileManager tile_manager;
|
TileManager tile_manager;
|
||||||
Common::SlotVector<Image> slot_images;
|
Common::SlotVector<Image> slot_images;
|
||||||
Common::SlotVector<ImageView> slot_image_views;
|
Common::SlotVector<ImageView> slot_image_views;
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||||
#include "video_core/texture_cache/image_view.h"
|
#include "video_core/texture_cache/image_view.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
|
||||||
#include "video_core/texture_cache/tile_manager.h"
|
#include "video_core/texture_cache/tile_manager.h"
|
||||||
|
|
||||||
#include "video_core/host_shaders/detile_m32x1_comp.h"
|
#include "video_core/host_shaders/detile_m32x1_comp.h"
|
||||||
|
|
Loading…
Reference in New Issue