shader_recompiler: Vs outputs
This commit is contained in:
parent
b4e1eebcdd
commit
8103dde915
|
@ -912,9 +912,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||||
return ORBIS_OK;
|
return 0x800000000;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
|
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
|
||||||
|
|
|
@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp();
|
||||||
int PS4_SYSV_ABI sceGnmGetEqEventType();
|
int PS4_SYSV_ABI sceGnmGetEqEventType();
|
||||||
int PS4_SYSV_ABI sceGnmGetEqTimeStamp();
|
int PS4_SYSV_ABI sceGnmGetEqTimeStamp();
|
||||||
int PS4_SYSV_ABI sceGnmGetGpuBlockStatus();
|
int PS4_SYSV_ABI sceGnmGetGpuBlockStatus();
|
||||||
int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
|
u64 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency();
|
||||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus();
|
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus();
|
||||||
int PS4_SYSV_ABI sceGnmGetLastWaitedAddress();
|
int PS4_SYSV_ABI sceGnmGetLastWaitedAddress();
|
||||||
int PS4_SYSV_ABI sceGnmGetNumTcaUnits();
|
int PS4_SYSV_ABI sceGnmGetNumTcaUnits();
|
||||||
|
|
|
@ -7,6 +7,37 @@
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
||||||
|
switch (output) {
|
||||||
|
case VsOutput::ClipDist0:
|
||||||
|
case VsOutput::ClipDist1:
|
||||||
|
case VsOutput::ClipDist2:
|
||||||
|
case VsOutput::ClipDist3:
|
||||||
|
case VsOutput::ClipDist4:
|
||||||
|
case VsOutput::ClipDist5:
|
||||||
|
case VsOutput::ClipDist6:
|
||||||
|
case VsOutput::ClipDist7: {
|
||||||
|
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
|
||||||
|
const Id clip_num{ctx.ConstU32(index)};
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
|
||||||
|
}
|
||||||
|
case VsOutput::CullDist0:
|
||||||
|
case VsOutput::CullDist1:
|
||||||
|
case VsOutput::CullDist2:
|
||||||
|
case VsOutput::CullDist3:
|
||||||
|
case VsOutput::CullDist4:
|
||||||
|
case VsOutput::CullDist5:
|
||||||
|
case VsOutput::CullDist6:
|
||||||
|
case VsOutput::CullDist7: {
|
||||||
|
const u32 index = u32(output) - u32(VsOutput::CullDist0);
|
||||||
|
const Id cull_num{ctx.ConstU32(index)};
|
||||||
|
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
|
@ -20,6 +51,12 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::Position0: {
|
case IR::Attribute::Position0: {
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
||||||
|
case IR::Attribute::Position1:
|
||||||
|
case IR::Attribute::Position2:
|
||||||
|
case IR::Attribute::Position3: {
|
||||||
|
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
||||||
|
return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]);
|
||||||
|
}
|
||||||
case IR::Attribute::RenderTarget0:
|
case IR::Attribute::RenderTarget0:
|
||||||
case IR::Attribute::RenderTarget1:
|
case IR::Attribute::RenderTarget1:
|
||||||
case IR::Attribute::RenderTarget2:
|
case IR::Attribute::RenderTarget2:
|
||||||
|
|
|
@ -364,4 +364,12 @@ Id EmitFPIsNan64(EmitContext& ctx, Id value) {
|
||||||
return ctx.OpIsNan(ctx.U1[1], value);
|
return ctx.OpIsNan(ctx.U1[1], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitFPIsInf32(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpIsInf(ctx.U1[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitFPIsInf64(EmitContext& ctx, Id value) {
|
||||||
|
return ctx.OpIsInf(ctx.U1[1], value);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -140,7 +140,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), color);
|
const Id color_type = texture.data_types->Get(4);
|
||||||
|
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), ctx.OpBitcast(color_type, color));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -241,6 +241,8 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
|
||||||
Id EmitFPIsNan16(EmitContext& ctx, Id value);
|
Id EmitFPIsNan16(EmitContext& ctx, Id value);
|
||||||
Id EmitFPIsNan32(EmitContext& ctx, Id value);
|
Id EmitFPIsNan32(EmitContext& ctx, Id value);
|
||||||
Id EmitFPIsNan64(EmitContext& ctx, Id value);
|
Id EmitFPIsNan64(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPIsInf32(EmitContext& ctx, Id value);
|
||||||
|
Id EmitFPIsInf64(EmitContext& ctx, Id value);
|
||||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
|
||||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
|
||||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
Id EmitISub32(EmitContext& ctx, Id a, Id b);
|
||||||
|
|
|
@ -224,8 +224,17 @@ void EmitContext::DefineInputs(const Info& info) {
|
||||||
|
|
||||||
void EmitContext::DefineOutputs(const Info& info) {
|
void EmitContext::DefineOutputs(const Info& info) {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Stage::Vertex:
|
case Stage::Vertex: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
|
||||||
|
f32_zero_value, f32_zero_value, f32_zero_value,
|
||||||
|
f32_zero_value, f32_zero_value};
|
||||||
|
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||||
|
const Id initializer{ConstantComposite(type, zero)};
|
||||||
|
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance,
|
||||||
|
spv::StorageClass::Output, initializer);
|
||||||
|
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance,
|
||||||
|
spv::StorageClass::Output, initializer);
|
||||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||||
if (!info.stores.GetAny(param)) {
|
if (!info.stores.GetAny(param)) {
|
||||||
|
@ -238,6 +247,7 @@ void EmitContext::DefineOutputs(const Info& info) {
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case Stage::Fragment:
|
case Stage::Fragment:
|
||||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||||
|
@ -319,12 +329,20 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
||||||
}
|
}
|
||||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||||
return spv::ImageFormat::Rg8Snorm;
|
return spv::ImageFormat::Rg8;
|
||||||
}
|
}
|
||||||
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
return spv::ImageFormat::Rgba16f;
|
return spv::ImageFormat::Rgba16f;
|
||||||
}
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return spv::ImageFormat::R8;
|
||||||
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return spv::ImageFormat::Rgba8;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,15 +66,16 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool global = true>
|
template <bool global = true>
|
||||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class,
|
||||||
|
std::optional<Id> initializer = std::nullopt) {
|
||||||
const Id pointer_type_id{TypePointer(storage_class, type)};
|
const Id pointer_type_id{TypePointer(storage_class, type)};
|
||||||
return global ? AddGlobalVariable(pointer_type_id, storage_class)
|
return global ? AddGlobalVariable(pointer_type_id, storage_class, initializer)
|
||||||
: AddLocalVariable(pointer_type_id, storage_class);
|
: AddLocalVariable(pointer_type_id, storage_class, initializer);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Id DefineVariable(Id type, std::optional<spv::BuiltIn> builtin,
|
[[nodiscard]] Id DefineVariable(Id type, std::optional<spv::BuiltIn> builtin,
|
||||||
spv::StorageClass storage_class) {
|
spv::StorageClass storage_class, std::optional<Id> initializer = std::nullopt) {
|
||||||
const Id id{DefineVar(type, storage_class)};
|
const Id id{DefineVar(type, storage_class, initializer)};
|
||||||
if (builtin) {
|
if (builtin) {
|
||||||
Decorate(id, spv::Decoration::BuiltIn, *builtin);
|
Decorate(id, spv::Decoration::BuiltIn, *builtin);
|
||||||
}
|
}
|
||||||
|
@ -169,6 +170,8 @@ public:
|
||||||
Id frag_depth{};
|
Id frag_depth{};
|
||||||
std::array<Id, 8> frag_color{};
|
std::array<Id, 8> frag_color{};
|
||||||
std::array<u32, 8> frag_num_comp{};
|
std::array<u32, 8> frag_num_comp{};
|
||||||
|
Id clip_distances{};
|
||||||
|
Id cull_distances{};
|
||||||
|
|
||||||
Id workgroup_id{};
|
Id workgroup_id{};
|
||||||
Id local_invocation_id{};
|
Id local_invocation_id{};
|
||||||
|
|
|
@ -377,6 +377,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::IMAGE_SAMPLE_LZ:
|
case Opcode::IMAGE_SAMPLE_LZ:
|
||||||
case Opcode::IMAGE_SAMPLE:
|
case Opcode::IMAGE_SAMPLE:
|
||||||
case Opcode::IMAGE_SAMPLE_L:
|
case Opcode::IMAGE_SAMPLE_L:
|
||||||
|
case Opcode::IMAGE_SAMPLE_C_O:
|
||||||
translator.IMAGE_SAMPLE(inst);
|
translator.IMAGE_SAMPLE(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::IMAGE_ATOMIC_ADD:
|
case Opcode::IMAGE_ATOMIC_ADD:
|
||||||
|
@ -490,6 +491,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_CMP_NGT_F32:
|
case Opcode::V_CMP_NGT_F32:
|
||||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_CMP_NGE_F32:
|
||||||
|
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||||
|
break;
|
||||||
case Opcode::S_CMP_LT_U32:
|
case Opcode::S_CMP_LT_U32:
|
||||||
translator.S_CMP(ConditionOp::LT, false, inst);
|
translator.S_CMP(ConditionOp::LT, false, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -811,6 +815,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
case Opcode::V_CMP_NE_U64:
|
case Opcode::V_CMP_NE_U64:
|
||||||
translator.V_CMP_NE_U64(inst);
|
translator.V_CMP_NE_U64(inst);
|
||||||
break;
|
break;
|
||||||
|
case Opcode::V_CMP_CLASS_F32:
|
||||||
|
translator.V_CMP_CLASS_F32(inst);
|
||||||
|
break;
|
||||||
case Opcode::V_TRUNC_F32:
|
case Opcode::V_TRUNC_F32:
|
||||||
translator.V_TRUNC_F32(inst);
|
translator.V_TRUNC_F32(inst);
|
||||||
break;
|
break;
|
||||||
|
@ -827,6 +834,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
||||||
translator.S_ADD_U32(inst);
|
translator.S_ADD_U32(inst);
|
||||||
break;
|
break;
|
||||||
case Opcode::S_SUB_U32:
|
case Opcode::S_SUB_U32:
|
||||||
|
case Opcode::S_SUB_I32:
|
||||||
translator.S_SUB_U32(inst);
|
translator.S_SUB_U32(inst);
|
||||||
break;
|
break;
|
||||||
// TODO: Separate implementation for legacy variants.
|
// TODO: Separate implementation for legacy variants.
|
||||||
|
|
|
@ -154,6 +154,7 @@ public:
|
||||||
void V_BFREV_B32(const GcnInst& inst);
|
void V_BFREV_B32(const GcnInst& inst);
|
||||||
void V_LDEXP_F32(const GcnInst& inst);
|
void V_LDEXP_F32(const GcnInst& inst);
|
||||||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||||
|
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||||
|
|
||||||
// Vector Memory
|
// Vector Memory
|
||||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||||
|
|
|
@ -50,11 +50,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||||
};
|
};
|
||||||
const bool has_flt_source =
|
const bool has_flt_source =
|
||||||
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
|
||||||
const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
|
||||||
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
|
||||||
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
|
||||||
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
|
||||||
}
|
}
|
||||||
|
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
|
||||||
|
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
|
||||||
|
}
|
||||||
const IR::Value result = ir.Select(flag, src1, src0);
|
const IR::Value result = ir.Select(flag, src1, src0);
|
||||||
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
ir.SetVectorReg(dst_reg, IR::U32F32{result});
|
||||||
}
|
}
|
||||||
|
@ -513,4 +516,11 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||||
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
ir.SetVcc(ir.Imm1(false));
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -831,6 +831,17 @@ U1 IREmitter::FPIsNan(const F32F64& value) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U1 IREmitter::FPIsInf(const F32F64& value) {
|
||||||
|
switch (value.Type()) {
|
||||||
|
case Type::F32:
|
||||||
|
return Inst<U1>(Opcode::FPIsInf32, value);
|
||||||
|
case Type::F64:
|
||||||
|
return Inst<U1>(Opcode::FPIsInf64, value);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(value.Type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
||||||
if (lhs.Type() != rhs.Type()) {
|
if (lhs.Type() != rhs.Type()) {
|
||||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||||
|
|
|
@ -144,6 +144,7 @@ public:
|
||||||
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||||
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||||
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
||||||
|
[[nodiscard]] U1 FPIsInf(const F32F64& value);
|
||||||
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
||||||
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
||||||
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
|
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
|
||||||
|
|
|
@ -210,6 +210,8 @@ OPCODE(FPUnordGreaterThanEqual32, U1, F32,
|
||||||
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
||||||
OPCODE(FPIsNan32, U1, F32, )
|
OPCODE(FPIsNan32, U1, F32, )
|
||||||
OPCODE(FPIsNan64, U1, F64, )
|
OPCODE(FPIsNan64, U1, F64, )
|
||||||
|
OPCODE(FPIsInf32, U1, F32, )
|
||||||
|
OPCODE(FPIsInf64, U1, F64, )
|
||||||
|
|
||||||
// Integer operations
|
// Integer operations
|
||||||
OPCODE(IAdd32, U32, U32, U32, )
|
OPCODE(IAdd32, U32, U32, U32, )
|
||||||
|
|
|
@ -42,6 +42,33 @@ enum class TextureType : u32 {
|
||||||
};
|
};
|
||||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||||
|
|
||||||
|
enum class VsOutput : u32 {
|
||||||
|
None,
|
||||||
|
PointSprite,
|
||||||
|
EdgeFlag,
|
||||||
|
KillFlag,
|
||||||
|
GsCutFlag,
|
||||||
|
GsMrtIndex,
|
||||||
|
GsVpIndex,
|
||||||
|
CullDist0,
|
||||||
|
CullDist1,
|
||||||
|
CullDist2,
|
||||||
|
CullDist3,
|
||||||
|
CullDist4,
|
||||||
|
CullDist5,
|
||||||
|
CullDist6,
|
||||||
|
CullDist7,
|
||||||
|
ClipDist0,
|
||||||
|
ClipDist1,
|
||||||
|
ClipDist2,
|
||||||
|
ClipDist3,
|
||||||
|
ClipDist4,
|
||||||
|
ClipDist5,
|
||||||
|
ClipDist6,
|
||||||
|
ClipDist7,
|
||||||
|
};
|
||||||
|
using VsOutputMap = std::array<VsOutput, 4>;
|
||||||
|
|
||||||
struct BufferResource {
|
struct BufferResource {
|
||||||
u32 sgpr_base;
|
u32 sgpr_base;
|
||||||
u32 dword_offset;
|
u32 dword_offset;
|
||||||
|
@ -123,6 +150,7 @@ struct Info {
|
||||||
};
|
};
|
||||||
AttributeFlags loads{};
|
AttributeFlags loads{};
|
||||||
AttributeFlags stores{};
|
AttributeFlags stores{};
|
||||||
|
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
|
||||||
|
|
||||||
BufferResourceList buffers;
|
BufferResourceList buffers;
|
||||||
ImageResourceList images;
|
ImageResourceList images;
|
||||||
|
|
|
@ -214,6 +214,10 @@ struct Liverpool {
|
||||||
BitField<18, 1, u32> use_vtx_render_target_idx;
|
BitField<18, 1, u32> use_vtx_render_target_idx;
|
||||||
BitField<19, 1, u32> use_vtx_viewport_idx;
|
BitField<19, 1, u32> use_vtx_viewport_idx;
|
||||||
BitField<20, 1, u32> use_vtx_kill_flag;
|
BitField<20, 1, u32> use_vtx_kill_flag;
|
||||||
|
BitField<21, 1, u32> vs_out_misc_enable;
|
||||||
|
BitField<22, 1, u32> vs_out_ccdist0_enable;
|
||||||
|
BitField<23, 1, u32> vs_out_ccdist1_enable;
|
||||||
|
BitField<25, 1, u32> use_vtx_gs_cut_flag;
|
||||||
|
|
||||||
bool IsClipDistEnabled(u32 index) const {
|
bool IsClipDistEnabled(u32 index) const {
|
||||||
return (clip_distance_enable.Value() >> index) & 1;
|
return (clip_distance_enable.Value() >> index) & 1;
|
||||||
|
@ -1027,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
||||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||||
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
||||||
|
|
|
@ -400,6 +400,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
||||||
num_format == AmdGpu::NumberFormat::Uint) {
|
num_format == AmdGpu::NumberFormat::Uint) {
|
||||||
return vk::Format::eR32G32Uint;
|
return vk::Format::eR32G32Uint;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format4_4_4_4 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
|
return vk::Format::eR4G4B4A4UnormPack16;
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,8 +85,8 @@ ComputePipeline::~ComputePipeline() = default;
|
||||||
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
|
||||||
VideoCore::TextureCache& texture_cache) const {
|
VideoCore::TextureCache& texture_cache) const {
|
||||||
// Bind resource buffers and textures.
|
// Bind resource buffers and textures.
|
||||||
boost::container::static_vector<vk::DescriptorBufferInfo, 8> buffer_infos;
|
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||||
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
|
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
|
||||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@ private:
|
||||||
vk::UniquePipeline pipeline;
|
vk::UniquePipeline pipeline;
|
||||||
vk::UniquePipelineLayout pipeline_layout;
|
vk::UniquePipelineLayout pipeline_layout;
|
||||||
vk::UniqueDescriptorSetLayout desc_layout;
|
vk::UniqueDescriptorSetLayout desc_layout;
|
||||||
|
u64 compute_key;
|
||||||
Shader::Info info{};
|
Shader::Info info{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,48 @@ extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
using Shader::VsOutput;
|
||||||
|
|
||||||
|
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||||
|
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||||
|
if (x != VsOutput::None || y != VsOutput::None ||
|
||||||
|
z != VsOutput::None || w != VsOutput::None) {
|
||||||
|
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// VS_OUT_MISC_VEC
|
||||||
|
add_output(
|
||||||
|
ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||||
|
ctl.use_vtx_edge_flag ? VsOutput::EdgeFlag :
|
||||||
|
(ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||||
|
ctl.use_vtx_kill_flag ? VsOutput::KillFlag :
|
||||||
|
(ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||||
|
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None
|
||||||
|
);
|
||||||
|
// VS_OUT_CCDIST0
|
||||||
|
add_output(
|
||||||
|
ctl.IsClipDistEnabled(0) ? VsOutput::ClipDist0 :
|
||||||
|
(ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(1) ? VsOutput::ClipDist1 :
|
||||||
|
(ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(2) ? VsOutput::ClipDist2 :
|
||||||
|
(ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(3) ? VsOutput::ClipDist3 :
|
||||||
|
(ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)
|
||||||
|
);
|
||||||
|
// VS_OUT_CCDIST1
|
||||||
|
add_output(
|
||||||
|
ctl.IsClipDistEnabled(4) ? VsOutput::ClipDist4 :
|
||||||
|
(ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(5) ? VsOutput::ClipDist5 :
|
||||||
|
(ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(6) ? VsOutput::ClipDist6 :
|
||||||
|
(ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||||
|
ctl.IsClipDistEnabled(7) ? VsOutput::ClipDist7 :
|
||||||
|
(ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||||
const AmdGpu::Liverpool::Regs& regs) {
|
const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Shader::Info info{};
|
Shader::Info info{};
|
||||||
|
@ -26,6 +68,7 @@ Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_d
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Shader::Stage::Vertex: {
|
case Shader::Stage::Vertex: {
|
||||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||||
|
BuildVsOutputs(info, regs.vs_output_control);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Fragment: {
|
case Shader::Stage::Fragment: {
|
||||||
|
@ -171,13 +214,13 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||||
// actual draw hence can skip pipeline creation.
|
// actual draw hence can skip pipeline creation.
|
||||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||||
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
|
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
|
||||||
return {};
|
//return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
|
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
|
||||||
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
|
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
|
||||||
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
||||||
return {};
|
//return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
|
@ -44,7 +44,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
try {
|
||||||
|
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
|
} catch (...) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
BeginRendering();
|
BeginRendering();
|
||||||
UpdateDynamicState(*pipeline);
|
UpdateDynamicState(*pipeline);
|
||||||
|
@ -70,9 +74,13 @@ void Rasterizer::DispatchDirect() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
try {
|
||||||
if (!has_resources) {
|
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
|
||||||
return;
|
if (!has_resources) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler.EndRendering();
|
scheduler.EndRendering();
|
||||||
|
@ -129,6 +137,10 @@ void Rasterizer::BeginRendering() {
|
||||||
};
|
};
|
||||||
texture_cache.TouchMeta(htile_address, false);
|
texture_cache.TouchMeta(htile_address, false);
|
||||||
state.num_depth_attachments++;
|
state.num_depth_attachments++;
|
||||||
|
} else {
|
||||||
|
if (regs.depth_render_control.depth_compress_disable) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "No depth buffer bound with dcc");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
scheduler.BeginRendering(state);
|
scheduler.BeginRendering(state);
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,6 +221,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
||||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||||
|
if (cpu_addr == 2990538752ULL) {
|
||||||
|
printf("bad\n");
|
||||||
|
}
|
||||||
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
ASSERT(info.pixel_format != vk::Format::eUndefined);
|
||||||
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
|
||||||
vk::ImageCreateFlagBits::eExtendedUsage};
|
vk::ImageCreateFlagBits::eExtendedUsage};
|
||||||
|
|
|
@ -77,7 +77,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
|
||||||
if (usage_override) {
|
if (usage_override) {
|
||||||
usage_ci.usage = usage_override.value();
|
usage_ci.usage = usage_override.value();
|
||||||
}
|
}
|
||||||
|
if (info.format == vk::Format::eR32Sfloat) {
|
||||||
|
printf("stop\n");
|
||||||
|
}
|
||||||
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
|
||||||
vk::Format format = info.format;
|
vk::Format format = info.format;
|
||||||
vk::ImageAspectFlags aspect = image.aspect_mask;
|
vk::ImageAspectFlags aspect = image.aspect_mask;
|
||||||
|
|
|
@ -127,14 +127,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address, bool r
|
||||||
image_ids.push_back(image_id);
|
image_ids.push_back(image_id);
|
||||||
});
|
});
|
||||||
|
|
||||||
ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
//ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!");
|
||||||
|
|
||||||
ImageId image_id{};
|
ImageId image_id{};
|
||||||
if (image_ids.empty()) {
|
if (image_ids.empty()) {
|
||||||
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
image_id = slot_images.insert(instance, scheduler, info, cpu_address);
|
||||||
RegisterImage(image_id);
|
RegisterImage(image_id);
|
||||||
} else {
|
} else {
|
||||||
image_id = image_ids[0];
|
image_id = image_ids[image_ids.size() > 1 ? 1 : 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
RegisterMeta(info, image_id);
|
RegisterMeta(info, image_id);
|
||||||
|
|
Loading…
Reference in New Issue