video_core: Preliminary storage image support and more (#188)

* vk_rasterizer: Clear depth buffer when DB_RENDER_CONTROL says so

* video_core: Preliminary storage image support, more opcodes

* renderer_vulkan: a fix for vertex buffers merging

* renderer_vulkan: a heuristic for blend override when alpha out is masked

---------

Co-authored-by: psucien <bad_cast@protonmail.com>
This commit is contained in:
TheTurtle 2024-06-10 22:35:14 +03:00 committed by GitHub
parent 23f11a3fda
commit 7b1a317b09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 429 additions and 101 deletions

View File

@ -322,6 +322,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/backend/spirv/emit_spirv_select.cpp src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
src/shader_recompiler/backend/spirv/emit_spirv_special.cpp src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
src/shader_recompiler/backend/spirv/spirv_emit_context.cpp src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
src/shader_recompiler/backend/spirv/spirv_emit_context.h src/shader_recompiler/backend/spirv/spirv_emit_context.h
src/shader_recompiler/frontend/translate/data_share.cpp src/shader_recompiler/frontend/translate/data_share.cpp

View File

@ -174,6 +174,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{}; spv::ExecutionModel execution_model{};
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
switch (program.info.stage) { switch (program.info.stage) {
case Stage::Compute: { case Stage::Compute: {
const std::array<u32, 3> workgroup_size{program.info.workgroup_size}; const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
@ -192,6 +193,10 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
} else { } else {
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
} }
if (program.info.uses_group_quad) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
}
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
// if (program.info.stores_frag_depth) { // if (program.info.stores_frag_depth) {
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);

View File

@ -21,11 +21,20 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::Position0: { case IR::Attribute::Position0: {
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element)); return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget0:
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[0], ctx.ConstU32(element)); case IR::Attribute::RenderTarget1:
case IR::Attribute::RenderTarget2:
case IR::Attribute::RenderTarget3: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
if (ctx.frag_num_comp[index] > 1) {
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
} else {
return ctx.frag_color[index];
}
} }
default: default:
throw NotImplementedException("Read attribute {}", attr); throw NotImplementedException("Read attribute {}", attr);
} }
}
} }
} // Anonymous namespace } // Anonymous namespace
@ -152,7 +161,15 @@ Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
} }
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
UNREACHABLE(); const auto info = inst->Flags<IR::BufferInstInfo>();
const auto& buffer = ctx.buffers[handle];
boost::container::static_vector<Id, 3> ids;
for (u32 i = 0; i < 3; i++) {
const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
}
return ctx.OpCompositeConstruct(buffer.data_types->Get(3), ids);
} }
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {

View File

@ -50,9 +50,11 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
throw NotImplementedException("SPIR-V Instruction"); throw NotImplementedException("SPIR-V Instruction");
} }
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id lod, Id ms) { Id ms) {
throw NotImplementedException("SPIR-V Instruction"); const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
return ctx.OpImageFetch(ctx.F32[4], image, coords, spv::ImageOperandsMask::Lod, lod);
} }
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
@ -73,8 +75,10 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
throw NotImplementedException("SPIR-V Instruction"); throw NotImplementedException("SPIR-V Instruction");
} }
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) {
throw NotImplementedException("SPIR-V Instruction"); const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
ctx.OpImageWrite(image, ctx.OpBitcast(ctx.S32[2], coords), color);
} }
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -344,14 +344,17 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
const IR::Value& offset, const IR::Value& offset2); const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref); const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id offset, Id lod,
Id lod, Id ms); Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
const IR::Value& skip_mips); const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp); Id derivatives, const IR::Value& offset, Id lod_clamp);
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
Id EmitLaneId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -0,0 +1,21 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
Id SubgroupScope(EmitContext& ctx) {
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
}
Id EmitLaneId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index) {
return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index);
}
} // namespace Shader::Backend::SPIRV

View File

@ -178,6 +178,11 @@ void EmitContext::DefineInputs(const Info& info) {
} }
break; break;
case Stage::Fragment: case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
for (const auto& input : info.ps_inputs) { for (const auto& input : info.ps_inputs) {
@ -231,7 +236,9 @@ void EmitContext::DefineOutputs(const Info& info) {
if (!info.stores.GetAny(mrt)) { if (!info.stores.GetAny(mrt)) {
continue; continue;
} }
frag_color[i] = DefineOutput(F32[4], i); const u32 num_components = info.stores.NumComponents(mrt);
frag_color[i] = DefineOutput(F32[num_components], i);
frag_num_comp[i] = num_components;
Name(frag_color[i], fmt::format("frag_color{}", i)); Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]); interfaces.push_back(frag_color[i]);
} }
@ -277,54 +284,22 @@ void EmitContext::DefineBuffers(const Info& info) {
} }
} }
Id ImageType(EmitContext& ctx, const ImageResource& desc) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
switch (desc.type) {
case AmdGpu::ImageType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Color1DArray:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color2DMsaa:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false,
desc.type == AmdGpu::ImageType::Color2DMsaa, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Color2DArray:
case AmdGpu::ImageType::Color2DMsaaArray:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true,
desc.type == AmdGpu::ImageType::Color2DMsaaArray, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format,
spv::AccessQualifier::ReadOnly);
case AmdGpu::ImageType::Buffer:
break;
}
throw InvalidArgument("Invalid texture type {}", desc.type);
}
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto format = spv::ImageFormat::Unknown; // Read this from tsharp? const auto format = spv::ImageFormat::Unknown;
const u32 sampled = desc.is_storage ? 2 : 1;
switch (desc.type) { switch (desc.type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color1DArray:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, sampled, format);
case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format);
case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Color2DArray:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format);
case AmdGpu::ImageType::Color3D: case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube: case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, 1, format); return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format);
case AmdGpu::ImageType::Buffer: case AmdGpu::ImageType::Buffer:
throw NotImplementedException("Image buffer"); throw NotImplementedException("Image buffer");
default: default:
@ -345,7 +320,7 @@ void EmitContext::DefineImagesAndSamplers(const Info& info) {
image_desc.dword_offset)); image_desc.dword_offset));
images.push_back({ images.push_back({
.id = id, .id = id,
.sampled_type = TypeSampledImage(image_type), .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type, .pointer_type = pointer_type,
.image_type = image_type, .image_type = image_type,
}); });

View File

@ -158,9 +158,11 @@ public:
Id frag_coord{}; Id frag_coord{};
Id front_facing{}; Id front_facing{};
std::array<Id, 8> frag_color{}; std::array<Id, 8> frag_color{};
std::array<u32, 8> frag_num_comp{};
Id workgroup_id{}; Id workgroup_id{};
Id local_invocation_id{}; Id local_invocation_id{};
Id subgroup_local_invocation_id{};
struct TextureDefinition { struct TextureDefinition {
Id id; Id id;

View File

@ -5,6 +5,19 @@
namespace Shader::Gcn { namespace Shader::Gcn {
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
const u8 offset0 = inst.control.ds.offset0;
const u8 offset1 = inst.control.ds.offset1;
const IR::U32 src{GetSrc(inst.src[1])};
ASSERT(offset1 & 0x80);
const IR::U32 lane_id = ir.LaneId();
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
const IR::U32 index =
ir.IAdd(lane_id, ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2)));
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
}
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) {
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};

View File

@ -75,9 +75,17 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
// SGPR we have a special IR opcode for SPGRs that act as thread masks. // SGPR we have a special IR opcode for SPGRs that act as thread masks.
ASSERT(inst.src[0].field == OperandField::VccLo);
const IR::U1 exec{ir.GetExec()}; const IR::U1 exec{ir.GetExec()};
const IR::U1 vcc{ir.GetVcc()}; const IR::U1 src = [&] {
switch (inst.src[0].field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
default:
UNREACHABLE();
}
}();
// Mark destination SPGR as an EXEC context. This means we will use 1-bit // Mark destination SPGR as an EXEC context. This means we will use 1-bit
// IR instruction whenever it's loaded. // IR instruction whenever it's loaded.
@ -96,7 +104,7 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
} }
// Update EXEC. // Update EXEC.
ir.SetExec(ir.LogicalAnd(exec, vcc)); ir.SetExec(ir.LogicalAnd(exec, src));
} }
void Translator::S_MOV_B64(const GcnInst& inst) { void Translator::S_MOV_B64(const GcnInst& inst) {
@ -258,4 +266,11 @@ void Translator::S_LSHL_B32(const GcnInst& inst) {
ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
} }
void Translator::S_BFM_B32(const GcnInst& inst) {
const IR::U32 src0{ir.BitwiseAnd(GetSrc(inst.src[0]), ir.Imm32(0x1F))};
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
const IR::U32 mask{ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1u), src0), ir.Imm32(1))};
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -306,6 +306,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE: case Opcode::IMAGE_SAMPLE:
translator.IMAGE_SAMPLE(inst); translator.IMAGE_SAMPLE(inst);
break; break;
case Opcode::IMAGE_STORE:
translator.IMAGE_STORE(inst);
break;
case Opcode::IMAGE_LOAD_MIP:
translator.IMAGE_LOAD_MIP(inst);
break;
case Opcode::V_CMP_GE_I32:
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
break;
case Opcode::V_CMP_EQ_I32: case Opcode::V_CMP_EQ_I32:
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst); translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
break; break;
@ -331,28 +340,31 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst); translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
break; break;
case Opcode::V_CMP_NEQ_F32: case Opcode::V_CMP_NEQ_F32:
translator.V_CMP_F32(ConditionOp::LG, inst); translator.V_CMP_F32(ConditionOp::LG, false, inst);
break; break;
case Opcode::V_CMP_F_F32: case Opcode::V_CMP_F_F32:
translator.V_CMP_F32(ConditionOp::F, inst); translator.V_CMP_F32(ConditionOp::F, false, inst);
break; break;
case Opcode::V_CMP_LT_F32: case Opcode::V_CMP_LT_F32:
translator.V_CMP_F32(ConditionOp::LT, inst); translator.V_CMP_F32(ConditionOp::LT, false, inst);
break; break;
case Opcode::V_CMP_EQ_F32: case Opcode::V_CMP_EQ_F32:
translator.V_CMP_F32(ConditionOp::EQ, inst); translator.V_CMP_F32(ConditionOp::EQ, false, inst);
break; break;
case Opcode::V_CMP_LE_F32: case Opcode::V_CMP_LE_F32:
translator.V_CMP_F32(ConditionOp::LE, inst); translator.V_CMP_F32(ConditionOp::LE, false, inst);
break; break;
case Opcode::V_CMP_GT_F32: case Opcode::V_CMP_GT_F32:
translator.V_CMP_F32(ConditionOp::GT, inst); translator.V_CMP_F32(ConditionOp::GT, false, inst);
break; break;
case Opcode::V_CMP_LG_F32: case Opcode::V_CMP_LG_F32:
translator.V_CMP_F32(ConditionOp::LG, inst); translator.V_CMP_F32(ConditionOp::LG, false, inst);
break; break;
case Opcode::V_CMP_GE_F32: case Opcode::V_CMP_GE_F32:
translator.V_CMP_F32(ConditionOp::GE, inst); translator.V_CMP_F32(ConditionOp::GE, false, inst);
break;
case Opcode::V_CMP_NLE_F32:
translator.V_CMP_F32(ConditionOp::GT, false, inst);
break; break;
case Opcode::S_CMP_LG_U32: case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst); translator.S_CMP(ConditionOp::LG, false, inst);
@ -378,6 +390,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CNDMASK_B32: case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst); translator.V_CNDMASK_B32(inst);
break; break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZW: case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
translator.BUFFER_LOAD_FORMAT(4, true, inst); translator.BUFFER_LOAD_FORMAT(4, true, inst);
break; break;
@ -414,6 +429,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MIN_F32: case Opcode::V_MIN_F32:
translator.V_MIN_F32(inst); translator.V_MIN_F32(inst);
break; break;
case Opcode::V_MIN_I32:
translator.V_MIN_I32(inst);
break;
case Opcode::V_MIN3_F32: case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst); translator.V_MIN3_F32(inst);
break; break;
@ -435,6 +453,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CVT_U32_F32: case Opcode::V_CVT_U32_F32:
translator.V_CVT_U32_F32(inst); translator.V_CVT_U32_F32(inst);
break; break;
case Opcode::V_CVT_I32_F32:
translator.V_CVT_I32_F32(inst);
break;
case Opcode::V_SUBREV_F32: case Opcode::V_SUBREV_F32:
translator.V_SUBREV_F32(inst); translator.V_SUBREV_F32(inst);
break; break;
@ -447,12 +468,61 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_SUBREV_I32: case Opcode::V_SUBREV_I32:
translator.V_SUBREV_I32(inst); translator.V_SUBREV_I32(inst);
break; break;
case Opcode::V_CMPX_F_F32:
translator.V_CMP_F32(ConditionOp::F, true, inst);
break;
case Opcode::V_CMPX_LT_F32:
translator.V_CMP_F32(ConditionOp::LT, true, inst);
break;
case Opcode::V_CMPX_EQ_F32:
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
break;
case Opcode::V_CMPX_LE_F32:
translator.V_CMP_F32(ConditionOp::LE, true, inst);
break;
case Opcode::V_CMPX_GT_F32:
translator.V_CMP_F32(ConditionOp::GT, true, inst);
break;
case Opcode::V_CMPX_LG_F32:
translator.V_CMP_F32(ConditionOp::LG, true, inst);
break;
case Opcode::V_CMPX_GE_F32:
translator.V_CMP_F32(ConditionOp::GE, true, inst);
break;
case Opcode::V_CMPX_NGE_F32:
translator.V_CMP_F32(ConditionOp::LT, true, inst);
break;
case Opcode::V_CMPX_NLG_F32:
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
break;
case Opcode::V_CMPX_NGT_F32:
translator.V_CMP_F32(ConditionOp::LE, true, inst);
break;
case Opcode::V_CMPX_NLE_F32:
translator.V_CMP_F32(ConditionOp::GT, true, inst);
break;
case Opcode::V_CMPX_NEQ_F32:
translator.V_CMP_F32(ConditionOp::LG, true, inst);
break;
case Opcode::V_CMPX_NLT_F32:
translator.V_CMP_F32(ConditionOp::GE, true, inst);
break;
case Opcode::V_CMPX_TRU_F32:
translator.V_CMP_F32(ConditionOp::TRU, true, inst);
break;
case Opcode::V_CMP_LE_U32: case Opcode::V_CMP_LE_U32:
translator.V_CMP_U32(ConditionOp::LE, false, false, inst); translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
break; break;
case Opcode::V_CMP_GT_I32: case Opcode::V_CMP_GT_I32:
translator.V_CMP_U32(ConditionOp::GT, true, false, inst); translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
break; break;
case Opcode::V_CMP_LT_I32:
translator.V_CMP_U32(ConditionOp::LT, true, false, inst);
break;
case Opcode::V_CMPX_LT_I32:
translator.V_CMP_U32(ConditionOp::LT, true, true, inst);
break;
case Opcode::V_CMPX_F_U32: case Opcode::V_CMPX_F_U32:
translator.V_CMP_U32(ConditionOp::F, false, true, inst); translator.V_CMP_U32(ConditionOp::F, false, true, inst);
break; break;
@ -540,6 +610,18 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_BCNT_U32_B32: case Opcode::V_BCNT_U32_B32:
translator.V_BCNT_U32_B32(inst); translator.V_BCNT_U32_B32(inst);
break; break;
case Opcode::V_MAX3_F32:
translator.V_MAX3_F32(inst);
break;
case Opcode::DS_SWIZZLE_B32:
translator.DS_SWIZZLE_B32(inst);
break;
case Opcode::V_MUL_LO_U32:
translator.V_MUL_LO_U32(inst);
break;
case Opcode::S_BFM_B32:
translator.S_BFM_B32(inst);
break;
case Opcode::S_NOP: case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ: case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0: case Opcode::S_CBRANCH_SCC0:

View File

@ -49,6 +49,7 @@ public:
void S_CSELECT_B64(const GcnInst& inst); void S_CSELECT_B64(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst);
void S_LSHL_B32(const GcnInst& inst); void S_LSHL_B32(const GcnInst& inst);
void S_BFM_B32(const GcnInst& inst);
// Scalar Memory // Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -75,7 +76,7 @@ public:
void V_SUB_F32(const GcnInst& inst); void V_SUB_F32(const GcnInst& inst);
void V_RCP_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst);
void V_FMA_F32(const GcnInst& inst); void V_FMA_F32(const GcnInst& inst);
void V_CMP_F32(ConditionOp op, const GcnInst& inst); void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst); void V_MAX_F32(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst); void V_SIN_F32(const GcnInst& inst);
@ -106,6 +107,10 @@ public:
void V_RNDNE_F32(const GcnInst& inst); void V_RNDNE_F32(const GcnInst& inst);
void V_BCNT_U32_B32(const GcnInst& inst); void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst);
void V_MAX3_F32(const GcnInst& inst);
void V_CVT_I32_F32(const GcnInst& inst);
void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst);
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
@ -115,12 +120,15 @@ public:
void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst);
// Data share // Data share
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
// MIMG // MIMG
void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst);
void IMAGE_SAMPLE(const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst);
void IMAGE_STORE(const GcnInst& inst);
void IMAGE_LOAD_MIP(const GcnInst& inst);
// Export // Export
void EXP(const GcnInst& inst); void EXP(const GcnInst& inst);

View File

@ -20,7 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code}; const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true));
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
} }
@ -143,7 +144,7 @@ void Translator::V_FMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2)); SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
} }
void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) { void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)}; const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::U1 result = [&] { const IR::U1 result = [&] {
@ -166,6 +167,9 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
UNREACHABLE(); UNREACHABLE();
} }
}(); }();
if (set_exec) {
ir.SetExec(result);
}
switch (inst.dst[1].field) { switch (inst.dst[1].field) {
case OperandField::VccLo: case OperandField::VccLo:
@ -382,4 +386,28 @@ void Translator::V_COS_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPCos(src0)); SetDst(inst.dst[0], ir.FPCos(src0));
} }
void Translator::V_MAX3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
}
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
}
void Translator::V_MIN_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.SMin(src0, src1));
}
void Translator::V_MUL_LO_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IMul(src0, src1));
}
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -107,6 +107,48 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
} }
} }
void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg dest_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
IR::TextureInstInfo info{};
info.explicit_lod.Assign(1);
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
continue;
}
IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value);
}
}
void Translator::IMAGE_STORE(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg addr_reg{inst.src[0].code};
IR::VectorReg data_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
ASSERT(mimg.dmask == 0xF);
const IR::Value value = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
ir.ImageWrite(handle, body, value, {});
}
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) { void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
const auto& mtbuf = inst.control.mtbuf; const auto& mtbuf = inst.control.mtbuf;
const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vaddr{inst.src[0].code};

View File

@ -318,6 +318,14 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
} }
} }
U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId);
}
U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
return Inst<U32>(Opcode::QuadShuffle, value, index);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());

View File

@ -85,12 +85,8 @@ public:
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U32 LaneId();
[[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
[[nodiscard]] U1 GetOverflowFromOp(const Value& op);
[[nodiscard]] U1 GetSparseFromOp(const Value& op);
[[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View File

@ -52,6 +52,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x3: case Opcode::StoreBufferF32x3:
case Opcode::StoreBufferF32x4: case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferU32: case Opcode::StoreBufferU32:
case Opcode::ImageWrite:
return true; return true;
default: default:
return false; return false;

View File

@ -269,3 +269,7 @@ OPCODE(ImageQueryLod, F32x4, Opaq
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )

View File

@ -93,6 +93,16 @@ bool IsImageInstruction(const IR::Inst& inst) {
} }
} }
bool IsImageStorageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageWrite:
case IR::Opcode::ImageRead:
return true;
default:
return false;
}
}
class Descriptors { class Descriptors {
public: public:
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_, explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
@ -241,32 +251,42 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
IR::Inst* producer = inst.Arg(0).InstRecursive(); IR::Inst* producer = inst.Arg(0).InstRecursive();
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
producer->GetOpcode() == IR::Opcode::GetUserData);
const auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<IR::Inst*, IR::Inst*> {
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
return std::make_pair(producer->Arg(0).InstRecursive(),
producer->Arg(1).InstRecursive());
}
return std::make_pair(producer, nullptr);
}();
// Read image sharp. // Read image sharp.
const auto tsharp = TrackSharp(producer->Arg(0).InstRecursive()); const auto tsharp = TrackSharp(tsharp_handle);
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset); const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const u32 image_binding = descriptors.Add(ImageResource{ u32 image_binding = descriptors.Add(ImageResource{
.sgpr_base = tsharp.sgpr_base, .sgpr_base = tsharp.sgpr_base,
.dword_offset = tsharp.dword_offset, .dword_offset = tsharp.dword_offset,
.type = image.type, .type = image.type,
.nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()), .nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
.is_storage = false, .is_storage = IsImageStorageInstruction(inst),
.is_depth = bool(inst_info.is_depth), .is_depth = bool(inst_info.is_depth),
}); });
// Read sampler sharp. // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
const auto ssharp = TrackSharp(producer->Arg(1).InstRecursive()); if (ssharp_handle) {
const auto ssharp = TrackSharp(ssharp_handle);
const u32 sampler_binding = descriptors.Add(SamplerResource{ const u32 sampler_binding = descriptors.Add(SamplerResource{
.sgpr_base = ssharp.sgpr_base, .sgpr_base = ssharp.sgpr_base,
.dword_offset = ssharp.dword_offset, .dword_offset = ssharp.dword_offset,
}); });
image_binding |= (sampler_binding << 16);
}
// Patch image handle // Patch image handle
const u32 handle = image_binding | (sampler_binding << 16);
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(handle)); inst.SetArg(0, ir.Imm32(image_binding));
// Now that we know the image type, adjust texture coordinate vector. // Now that we know the image type, adjust texture coordinate vector.
const IR::Inst* body = inst.Arg(1).InstRecursive(); const IR::Inst* body = inst.Arg(1).InstRecursive();
@ -283,7 +303,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
case AmdGpu::ImageType::Cube: case AmdGpu::ImageType::Cube:
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
default: default:
UNREACHABLE(); UNREACHABLE_MSG("Unknown image type {}", image.type.Value());
} }
}(); }();
inst.SetArg(1, coords); inst.SetArg(1, coords);
@ -293,6 +313,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
const u32 arg_pos = inst_info.is_depth ? 5 : 4; const u32 arg_pos = inst_info.is_depth ? 5 : 4;
inst.SetArg(arg_pos, arg); inst.SetArg(arg_pos, arg);
} }
if (inst_info.explicit_lod && inst.GetOpcode() == IR::Opcode::ImageFetch) {
inst.SetArg(3, arg);
}
} }
void ResourceTrackingPass(IR::Program& program) { void ResourceTrackingPass(IR::Program& program) {

View File

@ -16,6 +16,9 @@ void Visit(Info& info, IR::Inst& inst) {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break; break;
} }
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;
default: default:
break; break;
} }

View File

@ -126,6 +126,8 @@ struct Info {
std::span<const u32> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
bool uses_group_quad{};
template <typename T> template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data; T data;

View File

@ -420,6 +420,13 @@ struct Liverpool {
}; };
union ColorBufferMask { union ColorBufferMask {
enum ColorComponent : u32 {
ComponentR = (1u << 0),
ComponentG = (1u << 1),
ComponentB = (1u << 2),
ComponentA = (1u << 3),
};
u32 raw; u32 raw;
BitField<0, 4, u32> output0_mask; BitField<0, 4, u32> output0_mask;
BitField<4, 4, u32> output1_mask; BitField<4, 4, u32> output1_mask;
@ -430,8 +437,8 @@ struct Liverpool {
BitField<24, 4, u32> output6_mask; BitField<24, 4, u32> output6_mask;
BitField<28, 4, u32> output7_mask; BitField<28, 4, u32> output7_mask;
[[nodiscard]] u8 GetMask(int buf_id) const { u32 GetMask(int buf_id) const {
return (raw >> (buf_id * 4)) & 0xffu; return (raw >> (buf_id * 4)) & 0xfu;
} }
}; };
@ -732,6 +739,20 @@ struct Liverpool {
float back_offset; float back_offset;
}; };
struct Address {
u32 address;
VAddr GetAddress() const {
return u64(address) << 8;
}
};
union DepthRenderControl {
u32 raw;
BitField<0, 1, u32> depth_clear_enable;
BitField<1, 1, u32> stencil_clear_enable;
};
union Regs { union Regs {
struct { struct {
INSERT_PADDING_WORDS(0x2C08); INSERT_PADDING_WORDS(0x2C08);
@ -740,11 +761,15 @@ struct Liverpool {
ShaderProgram vs_program; ShaderProgram vs_program;
INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16); INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16);
ComputeProgram cs_program; ComputeProgram cs_program;
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80); INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(4);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min; float depth_bounds_min;
float depth_bounds_max; float depth_bounds_max;
u32 stencil_clear; u32 stencil_clear;
u32 depth_clear; float depth_clear;
Scissor screen_scissor; Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
DepthBuffer depth_buffer; DepthBuffer depth_buffer;
@ -925,6 +950,8 @@ static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
@ -942,6 +969,7 @@ static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);

View File

@ -334,6 +334,19 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) { if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32Sfloat; return vk::Format::eR32Sfloat;
} }
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR16G16B16A16Sfloat;
}
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32Uint;
}
if (data_format == AmdGpu::DataFormat::Format32 && num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR32Sint;
}
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
} }

View File

@ -111,14 +111,15 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
for (const auto& image : info.images) { for (const auto& image : info.images) {
const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset); const auto tsharp = info.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp); const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral); image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, vk::ImageLayout::eGeneral);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(), .pImageInfo = &image_infos.back(),
}); });
} }

View File

@ -196,7 +196,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor); const auto dst_color = LiverpoolToVK::BlendFactor(control.color_dst_factor);
const auto color_blend = LiverpoolToVK::BlendOp(control.color_func); const auto color_blend = LiverpoolToVK::BlendOp(control.color_func);
attachments[i] = vk::PipelineColorBlendAttachmentState{ attachments[i] = vk::PipelineColorBlendAttachmentState{
.blendEnable = key.blend_controls[i].enable, .blendEnable = control.enable,
.srcColorBlendFactor = src_color, .srcColorBlendFactor = src_color,
.dstColorBlendFactor = dst_color, .dstColorBlendFactor = dst_color,
.colorBlendOp = color_blend, .colorBlendOp = color_blend,
@ -215,6 +215,29 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
: key.write_masks[i], : key.write_masks[i],
}; };
// On GCN GPU there is an additional mask which allows to control color components exported
// from a pixel shader. A situation possible, when the game may mask out the alpha channel,
// while it is still need to be used in blending ops. For such cases, HW will default alpha
// to 1 and perform the blending, while shader normally outputs 0 in the last component.
// Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting
// such cases and override alpha value in order to emulate HW behaviour.
const auto has_alpha_masked_out =
(key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0;
const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha ||
src_color == vk::BlendFactor::eOneMinusSrcAlpha;
const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha ||
dst_color == vk::BlendFactor::eOneMinusSrcAlpha;
if (has_alpha_masked_out && has_src_alpha_in_src_blend) {
attachments[i].srcColorBlendFactor = src_color == vk::BlendFactor::eSrcAlpha
? vk::BlendFactor::eOne
: vk::BlendFactor::eZero; // 1-A
}
if (has_alpha_masked_out && has_src_alpha_in_dst_blend) {
attachments[i].dstColorBlendFactor = dst_color == vk::BlendFactor::eSrcAlpha
? vk::BlendFactor::eOne
: vk::BlendFactor::eZero; // 1-A
}
} }
const vk::PipelineColorBlendStateCreateInfo color_blending = { const vk::PipelineColorBlendStateCreateInfo color_blending = {
@ -318,7 +341,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& image : stage.images) { for (const auto& image : stage.images) {
const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset); const auto tsharp = stage.ReadUd<AmdGpu::Image>(image.sgpr_base, image.dword_offset);
const auto& image_view = texture_cache.FindImageView(tsharp); const auto& image_view = texture_cache.FindImageView(tsharp, image.is_storage);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
vk::ImageLayout::eShaderReadOnlyOptimal); vk::ImageLayout::eShaderReadOnlyOptimal);
set_writes.push_back({ set_writes.push_back({
@ -326,7 +349,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
.dstBinding = binding++, .dstBinding = binding++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage, .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(), .pImageInfo = &image_infos.back(),
}); });
} }
@ -387,11 +411,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]}; boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges_merged{ranges[0]};
for (auto range : ranges) { for (auto range : ranges) {
auto& prev_range = ranges.back(); auto& prev_range = ranges_merged.back();
if (prev_range.end_address < range.base_address) { if (prev_range.end_address < range.base_address) {
ranges_merged.emplace_back(range); ranges_merged.emplace_back(range);
} else { } else {
ranges_merged.back().end_address = std::max(prev_range.end_address, range.end_address); prev_range.end_address = std::max(prev_range.end_address, range.end_address);
} }
} }

View File

@ -46,6 +46,7 @@ struct GraphicsPipelineKey {
Liverpool::CullMode cull_mode; Liverpool::CullMode cull_mode;
Liverpool::FrontFace front_face; Liverpool::FrontFace front_face;
Liverpool::ClipSpace clip_space; Liverpool::ClipSpace clip_space;
Liverpool::ColorBufferMask cb_shader_mask{};
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls; std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks; std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;

View File

@ -132,6 +132,7 @@ void PipelineCache::RefreshGraphicsKey() {
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass); !col_buf.info.blend_bypass);
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
key.cb_shader_mask = regs.color_shader_mask;
++remapped_cb; ++remapped_cb;
} }

View File

@ -60,13 +60,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
}); });
} }
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
const bool is_clear = regs.depth_render_control.depth_clear_enable;
const auto& image_view = const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent); texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
depth_attachment = { depth_attachment = {
.imageView = *image_view.image_view, .imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
.loadOp = vk::AttachmentLoadOp::eLoad, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
}; };
num_depth_attachments++; num_depth_attachments++;
} }

View File

@ -160,10 +160,10 @@ ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& vi
return slot_image_views[view_id]; return slot_image_views[view_id];
} }
ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storage) {
Image& image = FindImage(ImageInfo{desc}, desc.Address()); Image& image = FindImage(ImageInfo{desc}, desc.Address());
if (image.info.is_storage) { if (is_storage) {
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite);
} else { } else {
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead); image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead);
@ -194,6 +194,10 @@ ImageView& TextureCache::DepthTarget(const AmdGpu::Liverpool::DepthBuffer& buffe
auto& image = FindImage(info, buffer.Address(), false); auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
image.Transit(vk::ImageLayout::eDepthStencilAttachmentOptimal,
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead);
ImageViewInfo view_info; ImageViewInfo view_info;
view_info.format = info.pixel_format; view_info.format = info.pixel_format;
return RegisterImageView(image, view_info); return RegisterImageView(image, view_info);

View File

@ -41,7 +41,7 @@ public:
bool refresh_on_create = true); bool refresh_on_create = true);
/// Retrieves an image view with the properties of the specified image descriptor. /// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image); [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image, bool is_storage);
/// Retrieves the render target with specified properties /// Retrieves the render target with specified properties
[[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,